Explorar el Código

Update libTheora 1.1.1

LuisAntonRebollo hace 11 años
padre
commit
d9fc3abaa4
Se han modificado 100 ficheros con 12538 adiciones y 27313 borrados
  1. 62 0
      Engine/lib/libtheora/CHANGES
  2. 2 2
      Engine/lib/libtheora/COPYING
  3. 18 0
      Engine/lib/libtheora/LICENSE
  4. 3 0
      Engine/lib/libtheora/include/Makefile.am
  5. 414 0
      Engine/lib/libtheora/include/Makefile.in
  6. 7 0
      Engine/lib/libtheora/include/theora/Makefile.am
  7. 355 0
      Engine/lib/libtheora/include/theora/Makefile.in
  8. 26 29
      Engine/lib/libtheora/include/theora/codec.h
  9. 47 57
      Engine/lib/libtheora/include/theora/theora.h
  10. 19 1
      Engine/lib/libtheora/include/theora/theoradec.h
  11. 244 24
      Engine/lib/libtheora/include/theora/theoraenc.h
  12. 173 0
      Engine/lib/libtheora/lib/Makefile.am
  13. 845 0
      Engine/lib/libtheora/lib/Makefile.in
  14. 53 0
      Engine/lib/libtheora/lib/Version_script
  15. 82 0
      Engine/lib/libtheora/lib/Version_script-dec
  16. 43 0
      Engine/lib/libtheora/lib/Version_script-enc
  17. 2709 0
      Engine/lib/libtheora/lib/analyze.c
  18. 10 10
      Engine/lib/libtheora/lib/apiwrapper.c
  19. 3 4
      Engine/lib/libtheora/lib/apiwrapper.h
  20. 111 0
      Engine/lib/libtheora/lib/bitpack.c
  21. 34 13
      Engine/lib/libtheora/lib/bitpack.h
  22. 4 5
      Engine/lib/libtheora/lib/cpu.c
  23. 2 2
      Engine/lib/libtheora/lib/cpu.h
  24. 2 2
      Engine/lib/libtheora/lib/dct.h
  25. 0 121
      Engine/lib/libtheora/lib/dec/bitpack.c
  26. 0 2057
      Engine/lib/libtheora/lib/dec/decode.c
  27. 0 199
      Engine/lib/libtheora/lib/dec/fragment.c
  28. 0 325
      Engine/lib/libtheora/lib/dec/huffdec.c
  29. 0 26
      Engine/lib/libtheora/lib/dec/idct.h
  30. 0 88
      Engine/lib/libtheora/lib/dec/ocintrin.h
  31. 0 122
      Engine/lib/libtheora/lib/dec/quant.c
  32. 0 653
      Engine/lib/libtheora/lib/dec/x86/mmxstate.c
  33. 0 42
      Engine/lib/libtheora/lib/dec/x86/x86int.h
  34. 0 214
      Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c
  35. 0 1006
      Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c
  36. 0 377
      Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c
  37. 0 189
      Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c
  38. 0 49
      Engine/lib/libtheora/lib/dec/x86_vc/x86int.h
  39. 9 4
      Engine/lib/libtheora/lib/decapiwrapper.c
  40. 52 45
      Engine/lib/libtheora/lib/decinfo.c
  41. 30 18
      Engine/lib/libtheora/lib/decint.h
  42. 2943 0
      Engine/lib/libtheora/lib/decode.c
  43. 29 17
      Engine/lib/libtheora/lib/dequant.c
  44. 4 3
      Engine/lib/libtheora/lib/dequant.h
  45. 0 37
      Engine/lib/libtheora/lib/enc/block_inline.h
  46. 0 99
      Engine/lib/libtheora/lib/enc/blockmap.c
  47. 0 842
      Engine/lib/libtheora/lib/enc/codec_internal.h
  48. 0 268
      Engine/lib/libtheora/lib/enc/dct.c
  49. 0 941
      Engine/lib/libtheora/lib/enc/dct_decode.c
  50. 0 469
      Engine/lib/libtheora/lib/enc/dct_encode.c
  51. 0 422
      Engine/lib/libtheora/lib/enc/dsp.c
  52. 0 166
      Engine/lib/libtheora/lib/enc/dsp.h
  53. 0 1479
      Engine/lib/libtheora/lib/enc/encode.c
  54. 0 310
      Engine/lib/libtheora/lib/enc/encoder_huffman.c
  55. 0 74
      Engine/lib/libtheora/lib/enc/encoder_huffman.h
  56. 0 572
      Engine/lib/libtheora/lib/enc/encoder_idct.c
  57. 0 120
      Engine/lib/libtheora/lib/enc/encoder_lookup.h
  58. 0 558
      Engine/lib/libtheora/lib/enc/encoder_quant.c
  59. 0 1447
      Engine/lib/libtheora/lib/enc/encoder_toplevel.c
  60. 0 243
      Engine/lib/libtheora/lib/enc/frarray.c
  61. 0 392
      Engine/lib/libtheora/lib/enc/frinit.c
  62. 0 1034
      Engine/lib/libtheora/lib/enc/hufftables.h
  63. 0 767
      Engine/lib/libtheora/lib/enc/mcomp.c
  64. 0 339
      Engine/lib/libtheora/lib/enc/misc_common.c
  65. 0 89
      Engine/lib/libtheora/lib/enc/pb.c
  66. 0 951
      Engine/lib/libtheora/lib/enc/pp.c
  67. 0 48
      Engine/lib/libtheora/lib/enc/pp.h
  68. 0 43
      Engine/lib/libtheora/lib/enc/quant_lookup.h
  69. 0 110
      Engine/lib/libtheora/lib/enc/reconstruct.c
  70. 0 2301
      Engine/lib/libtheora/lib/enc/scan.c
  71. 0 40
      Engine/lib/libtheora/lib/enc/toplevel_lookup.h
  72. 0 409
      Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c
  73. 0 666
      Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c
  74. 0 347
      Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c
  75. 0 339
      Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c
  76. 0 1452
      Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c
  77. 0 182
      Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c
  78. 0 1605
      Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c
  79. 0 333
      Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c
  80. 0 197
      Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c
  81. 0 409
      Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c
  82. 0 303
      Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c
  83. 0 323
      Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c
  84. 0 342
      Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c
  85. 0 27
      Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c
  86. 0 184
      Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c
  87. 168 0
      Engine/lib/libtheora/lib/encapiwrapper.c
  88. 388 0
      Engine/lib/libtheora/lib/encfrag.c
  89. 121 0
      Engine/lib/libtheora/lib/encinfo.c
  90. 493 0
      Engine/lib/libtheora/lib/encint.h
  91. 1615 0
      Engine/lib/libtheora/lib/encode.c
  92. 274 0
      Engine/lib/libtheora/lib/enquant.c
  93. 27 0
      Engine/lib/libtheora/lib/enquant.h
  94. 422 0
      Engine/lib/libtheora/lib/fdct.c
  95. 87 0
      Engine/lib/libtheora/lib/fragment.c
  96. 489 0
      Engine/lib/libtheora/lib/huffdec.c
  97. 7 6
      Engine/lib/libtheora/lib/huffdec.h
  98. 90 321
      Engine/lib/libtheora/lib/huffenc.c
  99. 19 0
      Engine/lib/libtheora/lib/huffenc.h
  100. 3 3
      Engine/lib/libtheora/lib/huffman.h

+ 62 - 0
Engine/lib/libtheora/CHANGES

@@ -1,3 +1,65 @@
+libtheora 1.1.1 (2009 October 1)
+
+ - Fix problems with MSVC inline assembly
+ - Add the missing encoder_disabled.c to the distribution
+ - build updates: autogen.sh should work better after switching systems
+   and the MSVC project now defaults to the dynamic runtime library
+ - Namespace some variables to avoid conflicts on wince.
+
+libtheora 1.1.0 (2009 September 24)
+
+ - Fix various small issues with the example and telemetry code
+ - Fix handing a zero-byte packet as the first frame
+ - Documentation cleanup
+ - Two minor build fixes
+
+libtheora 1.1beta3 (2009 August 22)
+
+ - Rate control fixes to smooth quality
+ - MSVC build now exports all of the 1.0 api
+ - Assorted small bug fixes
+
+libtheora 1.1beta2 (2009 August 12)
+
+ - Fix a rate control problem with difficult input
+ - Build fixes for OpenBSD and Apple Xcode
+ - Examples now all use the 1.0 api
+ - TH_ENCCTL_SET_SPLEVEL works again
+ - Various bug fixes and source tree rearrangement
+
+libtheora 1.1beta1 (2009 August 5)
+
+ - Support for two-pass encoding
+ - Performance optimization of both encoder and decoder
+ - Encoder supports dynamic adjustment of quality and 
+   bitrate targets
+ - Encoder is generally more configurable, and all
+   rate control modes perform better
+ - Encoder now accepts 4:2:2 and 4:4:4 chroma sampling
+ - Decoder telemetry output shows quantization choice
+   and a breakdown of bitrate usage in the frame
+ - MSVC assembly optimizations up to date and functional
+
+libtheora 1.1alpha2 (2009 May 26)
+
+ - Reduce lambda for small quantizers.
+ - New encoder fDCT does better on smooth gradients
+ - Use SATD for mode decisions (1-2% bitrate reduction)
+ - Assembly rewrite for new features and general speed up
+ - Share code between the encoder and decoder for performance
+ - Fix 4:2:2 decoding and telemetry
+ - MSVC project files updated, but assembly is disabled.
+ - New configure option --disable-spec to work around toolchain
+   detection failures.
+ - Limit symbol exports on MacOS X.
+ - Port remaining unit tests from the 1.0 release.
+
+libtheora 1.1alpha1 (2009 March 27)
+
+ - Encoder rewrite with much improved vbr quality/bitrate and
+   better tracking of the target rate in cbr mode.
+ - MSVC project files do not work in this release.
+
 libtheora 1.0 (2008 November 3)
 libtheora 1.0 (2008 November 3)
 
 
  - Merge x86 assembly for forward DCT from Thusnelda branch.
  - Merge x86 assembly for forward DCT from Thusnelda branch.

+ 2 - 2
Engine/lib/libtheora/COPYING

@@ -1,4 +1,4 @@
-Copyright (C) 2002-2008 Xiph.Org Foundation and contributors.
+Copyright (C) 2002-2009 Xiph.org Foundation
 
 
 Redistribution and use in source and binary forms, with or without
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 modification, are permitted provided that the following conditions
@@ -11,7 +11,7 @@ notice, this list of conditions and the following disclaimer.
 notice, this list of conditions and the following disclaimer in the
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.
 documentation and/or other materials provided with the distribution.
 
 
-- Neither the name of the Xiph.Org Foundation nor the names of its
+- Neither the name of the Xiph.org Foundation nor the names of its
 contributors may be used to endorse or promote products derived from
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.
 this software without specific prior written permission.
 
 

+ 18 - 0
Engine/lib/libtheora/LICENSE

@@ -0,0 +1,18 @@
+Please see the file COPYING for the copyright license for this software.
+
+In addition to and irrespective of the copyright license associated
+with this software, On2 Technologies, Inc. makes the following statement
+regarding technology used in this software:
+
+  On2 represents and warrants that it shall not assert any rights 
+  relating to infringement of On2's registered patents, nor initiate
+  any litigation asserting such rights, against any person who, or
+  entity which utilizes the On2 VP3 Codec Software, including any 
+  use, distribution, and sale of said Software; which make changes, 
+  modifications, and improvements in said Software; and to use,
+  distribute, and sell said changes as well as applications for other 
+  fields of use.
+
+This reference implementation is originally derived from the On2 VP3
+Codec Software, and the Theora video format is essentially compatible
+with the VP3 video format, consisting of a backward-compatible superset.

+ 3 - 0
Engine/lib/libtheora/include/Makefile.am

@@ -0,0 +1,3 @@
+## Process this file with automake to produce Makefile.in
+
+SUBDIRS = theora

+ 414 - 0
Engine/lib/libtheora/include/Makefile.in

@@ -0,0 +1,414 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AR = @AR@
+ARGZ_H = @ARGZ_H@
+AS = @AS@
+AWK = @AWK@
+BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_LIBS = @CAIRO_LIBS@
+CC = @CC@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEBUG = @DEBUG@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+F77 = @F77@
+GCJ = @GCJ@
+GCJFLAGS = @GCJFLAGS@
+GETOPT_OBJS = @GETOPT_OBJS@
+GREP = @GREP@
+HAVE_BIBTEX = @HAVE_BIBTEX@
+HAVE_DOXYGEN = @HAVE_DOXYGEN@
+HAVE_PDFLATEX = @HAVE_PDFLATEX@
+HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
+HAVE_TRANSFIG = @HAVE_TRANSFIG@
+HAVE_VALGRIND = @HAVE_VALGRIND@
+INCLTDL = @INCLTDL@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LIBADD_DL = @LIBADD_DL@
+LIBADD_DLD_LINK = @LIBADD_DLD_LINK@
+LIBADD_DLOPEN = @LIBADD_DLOPEN@
+LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@
+LIBLTDL = @LIBLTDL@
+LIBM = @LIBM@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTDLDEPS = @LTDLDEPS@
+LTDLINCL = @LTDLINCL@
+LTDLOPEN = @LTDLOPEN@
+LT_CONFIG_H = @LT_CONFIG_H@
+LT_DLLOADERS = @LT_DLLOADERS@
+LT_DLPREOPEN = @LT_DLPREOPEN@
+MAINT = @MAINT@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PKG_CONFIG = @PKG_CONFIG@
+PNG_CFLAGS = @PNG_CFLAGS@
+PNG_LIBS = @PNG_LIBS@
+PROFILE = @PROFILE@
+RANLIB = @RANLIB@
+RC = @RC@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_CONFIG = @SDL_CONFIG@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+STRIP = @STRIP@
+THDEC_LIB_AGE = @THDEC_LIB_AGE@
+THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@
+THDEC_LIB_REVISION = @THDEC_LIB_REVISION@
+THENC_LIB_AGE = @THENC_LIB_AGE@
+THENC_LIB_CURRENT = @THENC_LIB_CURRENT@
+THENC_LIB_REVISION = @THENC_LIB_REVISION@
+THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@
+THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@
+THEORA_LDFLAGS = @THEORA_LDFLAGS@
+TH_LIB_AGE = @TH_LIB_AGE@
+TH_LIB_CURRENT = @TH_LIB_CURRENT@
+TH_LIB_REVISION = @TH_LIB_REVISION@
+VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@
+VERSION = @VERSION@
+VORBISENC_LIBS = @VORBISENC_LIBS@
+VORBISFILE_LIBS = @VORBISFILE_LIBS@
+VORBIS_CFLAGS = @VORBIS_CFLAGS@
+VORBIS_LIBS = @VORBIS_LIBS@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+lt_ECHO = @lt_ECHO@
+ltdl_LIBOBJS = @ltdl_LIBOBJS@
+ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@
+sys_symbol_underscore = @sys_symbol_underscore@
+
+SUBDIRS = theora
+subdir = include
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+DIST_SOURCES =
+
+RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \
+	uninstall-info-recursive all-recursive install-data-recursive \
+	install-exec-recursive installdirs-recursive install-recursive \
+	uninstall-recursive check-recursive installcheck-recursive
+DIST_COMMON = Makefile.am Makefile.in
+DIST_SUBDIRS = $(SUBDIRS)
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am  $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  include/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+distclean-libtool:
+	-rm -f libtool
+uninstall-info-am:
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+#     (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+	@set fnord $$MAKEFLAGS; amf=$$2; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	   || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+	@set fnord $$MAKEFLAGS; amf=$$2; \
+	dot_seen=no; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	rev=''; for subdir in $$list; do \
+	  if test "$$subdir" = "."; then :; else \
+	    rev="$$subdir $$rev"; \
+	  fi; \
+	done; \
+	rev="$$rev ."; \
+	target=`echo $@ | sed s/-recursive//`; \
+	for subdir in $$rev; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	   || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+	done && test -z "$$fail"
+tags-recursive:
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+	done
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	mkid -fID $$unique
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(ETAGS_ARGS)$$tags$$unique" \
+	  || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+	@list='$(DISTFILES)'; for file in $$list; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkinstalldirs) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
+	  if test -d $$d/$$file; then \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test -d $(distdir)/$$subdir \
+	    || mkdir $(distdir)/$$subdir \
+	    || exit 1; \
+	    (cd $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$(top_distdir)" \
+	        distdir=../$(distdir)/$$subdir \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+
+distclean-am: clean-am distclean-generic distclean-libtool \
+	distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-exec-am:
+
+install-info: install-info-recursive
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+uninstall-am: uninstall-info-am
+
+uninstall-info: uninstall-info-recursive
+
+.PHONY: $(RECURSIVE_TARGETS) GTAGS all all-am check check-am clean \
+	clean-generic clean-libtool clean-recursive distclean \
+	distclean-generic distclean-libtool distclean-recursive \
+	distclean-tags distdir dvi dvi-am dvi-recursive info info-am \
+	info-recursive install install-am install-data install-data-am \
+	install-data-recursive install-exec install-exec-am \
+	install-exec-recursive install-info install-info-am \
+	install-info-recursive install-man install-recursive \
+	install-strip installcheck installcheck-am installdirs \
+	installdirs-am installdirs-recursive maintainer-clean \
+	maintainer-clean-generic maintainer-clean-recursive mostlyclean \
+	mostlyclean-generic mostlyclean-libtool mostlyclean-recursive \
+	tags tags-recursive uninstall uninstall-am uninstall-info-am \
+	uninstall-info-recursive uninstall-recursive
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

+ 7 - 0
Engine/lib/libtheora/include/theora/Makefile.am

@@ -0,0 +1,7 @@
+## Process this file with automake to produce Makefile.in
+
+theoraincludedir = $(includedir)/theora
+
+theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h
+
+noinst_HEADERS = codec.h theoradec.h

+ 355 - 0
Engine/lib/libtheora/include/theora/Makefile.in

@@ -0,0 +1,355 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ../..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AR = @AR@
+ARGZ_H = @ARGZ_H@
+AS = @AS@
+AWK = @AWK@
+BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_LIBS = @CAIRO_LIBS@
+CC = @CC@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEBUG = @DEBUG@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+F77 = @F77@
+GCJ = @GCJ@
+GCJFLAGS = @GCJFLAGS@
+GETOPT_OBJS = @GETOPT_OBJS@
+GREP = @GREP@
+HAVE_BIBTEX = @HAVE_BIBTEX@
+HAVE_DOXYGEN = @HAVE_DOXYGEN@
+HAVE_PDFLATEX = @HAVE_PDFLATEX@
+HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
+HAVE_TRANSFIG = @HAVE_TRANSFIG@
+HAVE_VALGRIND = @HAVE_VALGRIND@
+INCLTDL = @INCLTDL@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LIBADD_DL = @LIBADD_DL@
+LIBADD_DLD_LINK = @LIBADD_DLD_LINK@
+LIBADD_DLOPEN = @LIBADD_DLOPEN@
+LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@
+LIBLTDL = @LIBLTDL@
+LIBM = @LIBM@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTDLDEPS = @LTDLDEPS@
+LTDLINCL = @LTDLINCL@
+LTDLOPEN = @LTDLOPEN@
+LT_CONFIG_H = @LT_CONFIG_H@
+LT_DLLOADERS = @LT_DLLOADERS@
+LT_DLPREOPEN = @LT_DLPREOPEN@
+MAINT = @MAINT@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PKG_CONFIG = @PKG_CONFIG@
+PNG_CFLAGS = @PNG_CFLAGS@
+PNG_LIBS = @PNG_LIBS@
+PROFILE = @PROFILE@
+RANLIB = @RANLIB@
+RC = @RC@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_CONFIG = @SDL_CONFIG@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+STRIP = @STRIP@
+THDEC_LIB_AGE = @THDEC_LIB_AGE@
+THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@
+THDEC_LIB_REVISION = @THDEC_LIB_REVISION@
+THENC_LIB_AGE = @THENC_LIB_AGE@
+THENC_LIB_CURRENT = @THENC_LIB_CURRENT@
+THENC_LIB_REVISION = @THENC_LIB_REVISION@
+THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@
+THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@
+THEORA_LDFLAGS = @THEORA_LDFLAGS@
+TH_LIB_AGE = @TH_LIB_AGE@
+TH_LIB_CURRENT = @TH_LIB_CURRENT@
+TH_LIB_REVISION = @TH_LIB_REVISION@
+VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@
+VERSION = @VERSION@
+VORBISENC_LIBS = @VORBISENC_LIBS@
+VORBISFILE_LIBS = @VORBISFILE_LIBS@
+VORBIS_CFLAGS = @VORBIS_CFLAGS@
+VORBIS_LIBS = @VORBIS_LIBS@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+lt_ECHO = @lt_ECHO@
+ltdl_LIBOBJS = @ltdl_LIBOBJS@
+ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@
+sys_symbol_underscore = @sys_symbol_underscore@
+
+theoraincludedir = $(includedir)/theora
+
+theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h
+
+noinst_HEADERS = codec.h theoradec.h
+subdir = include/theora
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+DIST_SOURCES =
+HEADERS = $(noinst_HEADERS) $(theorainclude_HEADERS)
+
+DIST_COMMON = $(noinst_HEADERS) $(theorainclude_HEADERS) Makefile.am \
+	Makefile.in
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am  $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  include/theora/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+distclean-libtool:
+	-rm -f libtool
+uninstall-info-am:
+theoraincludeHEADERS_INSTALL = $(INSTALL_HEADER)
+install-theoraincludeHEADERS: $(theorainclude_HEADERS)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(theoraincludedir)
+	@list='$(theorainclude_HEADERS)'; for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  echo " $(theoraincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(theoraincludedir)/$$f"; \
+	  $(theoraincludeHEADERS_INSTALL) $$d$$p $(DESTDIR)$(theoraincludedir)/$$f; \
+	done
+
+uninstall-theoraincludeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(theorainclude_HEADERS)'; for p in $$list; do \
+	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  echo " rm -f $(DESTDIR)$(theoraincludedir)/$$f"; \
+	  rm -f $(DESTDIR)$(theoraincludedir)/$$f; \
+	done
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	mkid -fID $$unique
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(ETAGS_ARGS)$$tags$$unique" \
+	  || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ../..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+	@list='$(DISTFILES)'; for file in $$list; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkinstalldirs) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
+	  if test -d $$d/$$file; then \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+
+installdirs:
+	$(mkinstalldirs) $(DESTDIR)$(theoraincludedir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-generic distclean-libtool \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-theoraincludeHEADERS
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+uninstall-am: uninstall-info-am uninstall-theoraincludeHEADERS
+
+.PHONY: GTAGS all all-am check check-am clean clean-generic \
+	clean-libtool distclean distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am info info-am install \
+	install-am install-data install-data-am install-exec \
+	install-exec-am install-info install-info-am install-man \
+	install-strip install-theoraincludeHEADERS installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-generic \
+	mostlyclean-libtool tags uninstall uninstall-am \
+	uninstall-info-am uninstall-theoraincludeHEADERS
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

+ 26 - 29
Engine/lib/libtheora/include/theora/codec.h

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -24,10 +24,10 @@
  * implementation for <a href="http://www.theora.org/">Theora</a>, a free,
  * implementation for <a href="http://www.theora.org/">Theora</a>, a free,
  * patent-unencumbered video codec.
  * patent-unencumbered video codec.
  * Theora is derived from On2's VP3 codec with additional features and
  * Theora is derived from On2's VP3 codec with additional features and
- *  integration for Ogg multimedia formats by
+ *  integration with Ogg multimedia formats by
  *  <a href="http://www.xiph.org/">the Xiph.Org Foundation</a>.
  *  <a href="http://www.xiph.org/">the Xiph.Org Foundation</a>.
  * Complete documentation of the format itself is available in
  * Complete documentation of the format itself is available in
- * <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ * <a href="http://www.theora.org/doc/Theora.pdf">the Theora
  *  specification</a>.
  *  specification</a>.
  *
  *
  * \subsection Organization
  * \subsection Organization
@@ -92,9 +92,9 @@ extern "C" {
 /*@}*/
 /*@}*/
 
 
 /**The currently defined color space tags.
 /**The currently defined color space tags.
- * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
- *  specification</a>, Chapter 4, for exact details on the meaning of each of
- *  these color spaces.*/
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Chapter 4, for exact details on the meaning
+ *  of each of these color spaces.*/
 typedef enum{
 typedef enum{
   /**The color space was not specified at the encoder.
   /**The color space was not specified at the encoder.
       It may be conveyed by an external means.*/
       It may be conveyed by an external means.*/
@@ -108,13 +108,13 @@ typedef enum{
 }th_colorspace;
 }th_colorspace;
 
 
 /**The currently defined pixel format tags.
 /**The currently defined pixel format tags.
- * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
  *  specification</a>, Section 4.4, for details on the precise sample
  *  specification</a>, Section 4.4, for details on the precise sample
  *  locations.*/
  *  locations.*/
 typedef enum{
 typedef enum{
   /**Chroma decimation by 2 in both the X and Y directions (4:2:0).
   /**Chroma decimation by 2 in both the X and Y directions (4:2:0).
-     The Cb and Cr chroma planes are half the width and half the height of the
-      luma plane.*/
+     The Cb and Cr chroma planes are half the width and half the
+      height of the luma plane.*/
   TH_PF_420,
   TH_PF_420,
   /**Currently reserved.*/
   /**Currently reserved.*/
   TH_PF_RSVD,
   TH_PF_RSVD,
@@ -133,11 +133,11 @@ typedef enum{
 
 
 /**A buffer for a single color plane in an uncompressed image.
 /**A buffer for a single color plane in an uncompressed image.
  * This contains the image data in a left-to-right, top-down format.
  * This contains the image data in a left-to-right, top-down format.
- * Each row of pixels is stored contiguously in memory, but successive rows
- *  need not be.
+ * Each row of pixels is stored contiguously in memory, but successive
+ *  rows need not be.
  * Use \a stride to compute the offset of the next row.
  * Use \a stride to compute the offset of the next row.
- * The encoder accepts both positive \a stride values (top-down in memory) and
- *  negative (bottom-up in memory).
+ * The encoder accepts both positive \a stride values (top-down in memory)
+ *  and negative (bottom-up in memory).
  * The decoder currently always generates images with positive strides.*/
  * The decoder currently always generates images with positive strides.*/
 typedef struct{
 typedef struct{
   /**The width of this plane.*/
   /**The width of this plane.*/
@@ -151,18 +151,18 @@ typedef struct{
 }th_img_plane;
 }th_img_plane;
 
 
 /**A complete image buffer for an uncompressed frame.
 /**A complete image buffer for an uncompressed frame.
- * The chroma planes may be decimated by a factor of two in either direction,
- *  as indicated by th_info#pixel_fmt.
+ * The chroma planes may be decimated by a factor of two in either
+ *  direction, as indicated by th_info#pixel_fmt.
  * The width and height of the Y' plane must be multiples of 16.
  * The width and height of the Y' plane must be multiples of 16.
- * They may need to be cropped for display, using the rectangle specified by
- *  th_info#pic_x, th_info#pic_y, th_info#pic_width, and
- *  th_info#pic_height.
+ * They may need to be cropped for display, using the rectangle
+ *  specified by th_info#pic_x, th_info#pic_y, th_info#pic_width,
+ *  and th_info#pic_height.
  * All samples are 8 bits.
  * All samples are 8 bits.
  * \note The term YUV often used to describe a colorspace is ambiguous.
  * \note The term YUV often used to describe a colorspace is ambiguous.
- * The exact parameters of the RGB to YUV conversion process aside, in many
- *  contexts the U and V channels actually have opposite meanings.
- * To avoid this confusion, we are explicit: the name of the color channels are
- *  Y'CbCr, and they appear in that order, always.
+ * The exact parameters of the RGB to YUV conversion process aside, in
+ *  many contexts the U and V channels actually have opposite meanings.
+ * To avoid this confusion, we are explicit: the name of the color
+ *  channels are Y'CbCr, and they appear in that order, always.
  * The prime symbol denotes that the Y channel is non-linear.
  * The prime symbol denotes that the Y channel is non-linear.
  * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
  * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
 typedef th_img_plane th_ycbcr_buffer[3];
 typedef th_img_plane th_ycbcr_buffer[3];
@@ -192,7 +192,7 @@ typedef th_img_plane th_ycbcr_buffer[3];
  *
  *
  * It is also generally recommended that the offsets and sizes should still be
  * It is also generally recommended that the offsets and sizes should still be
  *  multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
  *  multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
- * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
  *  specification</a>, Section 4.4, for more details.
  *  specification</a>, Section 4.4, for more details.
  *
  *
  * Frame rate, in frames per second, is stored as a rational fraction, as is
  * Frame rate, in frames per second, is stored as a rational fraction, as is
@@ -230,8 +230,8 @@ typedef struct{
    *  #frame_height-#pic_height-#pic_y must be no larger than 255.
    *  #frame_height-#pic_height-#pic_y must be no larger than 255.
    * This slightly funny restriction is due to the fact that the offset is
    * This slightly funny restriction is due to the fact that the offset is
    *  specified from the top of the image for consistency with the standard
    *  specified from the top of the image for consistency with the standard
-   *  graphics left-handed coordinate system used throughout this API, while it
-   *  is stored in the encoded stream as an offset from the bottom.*/
+   *  graphics left-handed coordinate system used throughout this API, while
+   *  it is stored in the encoded stream as an offset from the bottom.*/
   ogg_uint32_t  pic_y;
   ogg_uint32_t  pic_y;
   /**\name Frame rate
   /**\name Frame rate
    * The frame rate, as a fraction.
    * The frame rate, as a fraction.
@@ -259,9 +259,6 @@ typedef struct{
   /**The target bit-rate in bits per second.
   /**The target bit-rate in bits per second.
      If initializing an encoder with this struct, set this field to a non-zero
      If initializing an encoder with this struct, set this field to a non-zero
       value to activate CBR encoding by default.*/
       value to activate CBR encoding by default.*/
-  /*TODO: Current encoder does not support CBR mode, or anything like it.
-    We also don't really know what nominal rate each quality level
-     corresponds to yet.*/
   int           target_bitrate;
   int           target_bitrate;
   /**The target quality level.
   /**The target quality level.
      Valid values range from 0 to 63, inclusive, with higher values giving
      Valid values range from 0 to 63, inclusive, with higher values giving
@@ -314,7 +311,7 @@ typedef struct{
  * A particular tag may occur more than once, and order is significant.
  * A particular tag may occur more than once, and order is significant.
  * The character set encoding for the strings is always UTF-8, but the tag
  * The character set encoding for the strings is always UTF-8, but the tag
  *  names are limited to ASCII, and treated as case-insensitive.
  *  names are limited to ASCII, and treated as case-insensitive.
- * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
  *  specification</a>, Section 6.3.3 for details.
  *  specification</a>, Section 6.3.3 for details.
  *
  *
  * In filling in this structure, th_decode_headerin() will null-terminate
  * In filling in this structure, th_decode_headerin() will null-terminate

+ 47 - 57
Engine/lib/libtheora/include/theora/theora.h

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -27,11 +27,11 @@ extern "C"
 
 
 #include <ogg/ogg.h>
 #include <ogg/ogg.h>
 
 
-/** \defgroup oldfuncs Legacy pre-1.0 C API */
-/*  @{ */
-
-/** \mainpage
- * 
+/** \file
+ * The libtheora pre-1.0 legacy C API.
+ *
+ * \ingroup oldfuncs
+ *
  * \section intro Introduction
  * \section intro Introduction
  *
  *
  * This is the documentation for the libtheora legacy C API, declared in 
  * This is the documentation for the libtheora legacy C API, declared in 
@@ -42,7 +42,7 @@ extern "C"
  *
  *
  * libtheora is the reference implementation for
  * libtheora is the reference implementation for
  * <a href="http://www.theora.org/">Theora</a>, a free video codec.
  * <a href="http://www.theora.org/">Theora</a>, a free video codec.
- * Theora is derived from On2's VP3 codec with improved integration for
+ * Theora is derived from On2's VP3 codec with improved integration with
  * Ogg multimedia formats by <a href="http://www.xiph.org/">Xiph.Org</a>.
  * Ogg multimedia formats by <a href="http://www.xiph.org/">Xiph.Org</a>.
  * 
  * 
  * \section overview Overview
  * \section overview Overview
@@ -114,21 +114,11 @@ extern "C"
  * checking beyond whether a header bit is present.  Instead, use the
  * checking beyond whether a header bit is present.  Instead, use the
  * theora_decode_header() function and check the return value; or examine the
  * theora_decode_header() function and check the return value; or examine the
  * header bytes at the beginning of the Ogg page.
  * header bytes at the beginning of the Ogg page.
- *
- * \subsection example Example Decoder 
- *
- * See <a href="http://svn.xiph.org/trunk/theora/examples/dump_video.c">
- * examples/dump_video.c</a> for a simple decoder implementation.
- *
- * \section encoding Encoding Process
- *
- * See <a href="http://svn.xiph.org/trunk/theora/examples/encoder_example.c">
- * examples/encoder_example.c</a> for a simple encoder implementation.
  */
  */
 
 
-/** \file
- * The libtheora pre-1.0 legacy C API.
- */
+
+/** \defgroup oldfuncs Legacy pre-1.0 C API */
+/*  @{ */
 
 
 /**
 /**
  * A YUV buffer for passing uncompressed frames to and from the codec.
  * A YUV buffer for passing uncompressed frames to and from the codec.
@@ -292,14 +282,21 @@ typedef struct theora_comment{
 
 
 
 
 /**\name theora_control() codes */
 /**\name theora_control() codes */
-
-/**\anchor decctlcodes
+/* \anchor decctlcodes_old
  * These are the available request codes for theora_control()
  * These are the available request codes for theora_control()
  * when called with a decoder instance.
  * when called with a decoder instance.
- * By convention, these are odd, to distinguish them from the
- *  \ref encctlcodes "encoder control codes".
+ * By convention decoder control codes are odd, to distinguish 
+ * them from \ref encctlcodes_old "encoder control codes" which
+ * are even.
+ *
+ * Note that since the 1.0 release, both the legacy and the final
+ * implementation accept all the same control codes, but only the
+ * final API declares the newer codes.
+ *
  * Keep any experimental or vendor-specific values above \c 0x8000.*/
  * Keep any experimental or vendor-specific values above \c 0x8000.*/
 
 
+/*@{*/
+
 /**Get the maximum post-processing level.
 /**Get the maximum post-processing level.
  * The decoder supports a post-processing filter that can improve
  * The decoder supports a post-processing filter that can improve
  * the appearance of the decoded images. This returns the highest
  * the appearance of the decoded images. This returns the highest
@@ -324,9 +321,9 @@ typedef struct theora_comment{
  * \param[in]  buf <tt>ogg_uint32_t</tt>: The maximum distance between key
  * \param[in]  buf <tt>ogg_uint32_t</tt>: The maximum distance between key
  *                   frames.
  *                   frames.
  * \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
  * \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
- * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
- * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
 #define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
 
 
 /**Set the granule position.
 /**Set the granule position.
@@ -338,33 +335,23 @@ typedef struct theora_comment{
  */
  */
 #define TH_DECCTL_SET_GRANPOS (5)
 #define TH_DECCTL_SET_GRANPOS (5)
 
 
+/**\anchor encctlcodes_old */
 
 
-/**\anchor encctlcodes
- * These are the available request codes for theora_control()
- * when called with an encoder instance.
- * By convention, these are even, to distinguish them from the
- *  \ref decctlcodes "decoder control codes".
- * Keep any experimental or vendor-specific values above \c 0x8000.*/
-/*@{*/
 /**Sets the quantization parameters to use.
 /**Sets the quantization parameters to use.
  * The parameters are copied, not stored by reference, so they can be freed
  * The parameters are copied, not stored by reference, so they can be freed
  *  after this call.
  *  after this call.
  * <tt>NULL</tt> may be specified to revert to the default parameters.
  * <tt>NULL</tt> may be specified to revert to the default parameters.
- * For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater than
- *  <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must be no
- *  greater than <tt>base[qti][pli][qi-1][ci]</tt>.
- * These two conditions ensure that the actual quantizer for a given \a qti,
- *  \a pli, and \a ci does not increase as \a qi increases.
  *
  *
  * \param[in] buf #th_quant_info
  * \param[in] buf #th_quant_info
- * \retval TH_FAULT  \a theora_state is <tt>NULL</tt>.
- * \retval TH_EINVAL Encoding has already begun, the quantization parameters
- *                    do not meet one of the above stated conditions, \a buf
- *                    is <tt>NULL</tt> and \a buf_sz is not zero, or \a buf
- *                    is non-<tt>NULL</tt> and \a buf_sz is not
- *                    <tt>sizeof(#th_quant_info)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
+ * \retval OC_FAULT  \a theora_state is <tt>NULL</tt>.
+ * \retval OC_EINVAL Encoding has already begun, the quantization parameters
+ *                    are not acceptable to this version of the encoder, 
+ *                    \a buf is <tt>NULL</tt> and \a buf_sz is not zero, 
+ *                    or \a buf is non-<tt>NULL</tt> and \a buf_sz is 
+ *                    not <tt>sizeof(#th_quant_info)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_QUANT_PARAMS (2)
 #define TH_ENCCTL_SET_QUANT_PARAMS (2)
+
 /**Disables any encoder features that would prevent lossless transcoding back
 /**Disables any encoder features that would prevent lossless transcoding back
  *  to VP3.
  *  to VP3.
  * This primarily means disabling block-level QI values and not using 4MV mode
  * This primarily means disabling block-level QI values and not using 4MV mode
@@ -389,10 +376,11 @@ typedef struct theora_comment{
  *                   4:2:0, the picture region is smaller than the full frame,
  *                   4:2:0, the picture region is smaller than the full frame,
  *                   or if encoding has begun, preventing the quantization
  *                   or if encoding has begun, preventing the quantization
  *                   tables and codebooks from being set.
  *                   tables and codebooks from being set.
- * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
- * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
 #define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
+
 /**Gets the maximum speed level.
 /**Gets the maximum speed level.
  * Higher speed levels favor quicker encoding over better quality per bit.
  * Higher speed levels favor quicker encoding over better quality per bit.
  * Depending on the encoding mode, and the internal algorithms used, quality
  * Depending on the encoding mode, and the internal algorithms used, quality
@@ -402,25 +390,27 @@ typedef struct theora_comment{
  *  the current encoding mode (VBR vs. CQI, etc.).
  *  the current encoding mode (VBR vs. CQI, etc.).
  *
  *
  * \param[out] buf int: The maximum encoding speed level.
  * \param[out] buf int: The maximum encoding speed level.
- * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
- * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
- * \retval TH_IMPL   Not supported by this implementation in the current
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation in the current
  *                    encoding mode.*/
  *                    encoding mode.*/
 #define TH_ENCCTL_GET_SPLEVEL_MAX (12)
 #define TH_ENCCTL_GET_SPLEVEL_MAX (12)
+
 /**Sets the speed level.
 /**Sets the speed level.
  * By default a speed value of 1 is used.
  * By default a speed value of 1 is used.
  *
  *
  * \param[in] buf int: The new encoding speed level.
  * \param[in] buf int: The new encoding speed level.
  *                      0 is slowest, larger values use less CPU.
  *                      0 is slowest, larger values use less CPU.
- * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
- * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>, or the
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>, or the
  *                    encoding speed level is out of bounds.
  *                    encoding speed level is out of bounds.
  *                   The maximum encoding speed level may be
  *                   The maximum encoding speed level may be
  *                    implementation- and encoding mode-specific, and can be
  *                    implementation- and encoding mode-specific, and can be
  *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
  *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
- * \retval TH_IMPL   Not supported by this implementation in the current
+ * \retval OC_IMPL   Not supported by this implementation in the current
  *                    encoding mode.*/
  *                    encoding mode.*/
 #define TH_ENCCTL_SET_SPLEVEL (14)
 #define TH_ENCCTL_SET_SPLEVEL (14)
+
 /*@}*/
 /*@}*/
 
 
 #define OC_FAULT       -1       /**< General failure */
 #define OC_FAULT       -1       /**< General failure */
@@ -779,8 +769,8 @@ extern void  theora_comment_clear(theora_comment *tc);
  * This is used to provide advanced control the encoding process.
  * This is used to provide advanced control the encoding process.
  * \param th     A #theora_state handle.
  * \param th     A #theora_state handle.
  * \param req    The control code to process.
  * \param req    The control code to process.
- *                See \ref encctlcodes "the list of available control codes"
- *                 for details.
+ *                See \ref encctlcodes_old "the list of available 
+ *			control codes" for details.
  * \param buf    The parameters for this control code.
  * \param buf    The parameters for this control code.
  * \param buf_sz The size of the parameter buffer.*/
  * \param buf_sz The size of the parameter buffer.*/
 extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz);
 extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz);

+ 19 - 1
Engine/lib/libtheora/include/theora/theoradec.h

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -38,6 +38,10 @@ extern "C" {
  * Keep any experimental or vendor-specific values above \c 0x8000.*/
  * Keep any experimental or vendor-specific values above \c 0x8000.*/
 /*@{*/
 /*@{*/
 /**Gets the maximum post-processing level.
 /**Gets the maximum post-processing level.
+ * The decoder supports a post-processing filter that can improve
+ * the appearance of the decoded images. This returns the highest
+ * level setting for this post-processor, corresponding to maximum
+ * improvement and computational expense.
  *
  *
  * \param[out] _buf int: The maximum post-processing level.
  * \param[out] _buf int: The maximum post-processing level.
  * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
@@ -47,6 +51,10 @@ extern "C" {
 /**Sets the post-processing level.
 /**Sets the post-processing level.
  * By default, post-processing is disabled.
  * By default, post-processing is disabled.
  *
  *
+ * Sets the level of post-processing to use when decoding the
+ * compressed stream. This must be a value between zero (off)
+ * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
+ *
  * \param[in] _buf int: The new post-processing level.
  * \param[in] _buf int: The new post-processing level.
  *                      0 to disable; larger values use more CPU.
  *                      0 to disable; larger values use more CPU.
  * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
@@ -83,6 +91,15 @@ extern "C" {
  * \retval TH_EINVAL  \a _buf_sz is not
  * \retval TH_EINVAL  \a _buf_sz is not
  *                     <tt>sizeof(th_stripe_callback)</tt>.*/
  *                     <tt>sizeof(th_stripe_callback)</tt>.*/
 #define TH_DECCTL_SET_STRIPE_CB (7)
 #define TH_DECCTL_SET_STRIPE_CB (7)
+
+/**Enables telemetry and sets the macroblock display mode */
+#define TH_DECCTL_SET_TELEMETRY_MBMODE (9)
+/**Enables telemetry and sets the motion vector display mode */
+#define TH_DECCTL_SET_TELEMETRY_MV (11)
+/**Enables telemetry and sets the adaptive quantization display mode */
+#define TH_DECCTL_SET_TELEMETRY_QI (13)
+/**Enables telemetry and sets the bitstream breakdown visualization mode */
+#define TH_DECCTL_SET_TELEMETRY_BITS (15)
 /*@}*/
 /*@}*/
 
 
 
 
@@ -289,6 +306,7 @@ extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
  *               It may be freed or overwritten without notification when
  *               It may be freed or overwritten without notification when
  *                subsequent frames are decoded.
  *                subsequent frames are decoded.
  * \retval 0 Success
  * \retval 0 Success
+ * \retval TH_EFAULT     \a _dec or \a _ycbcr was <tt>NULL</tt>.
  */
  */
 extern int th_decode_ycbcr_out(th_dec_ctx *_dec,
 extern int th_decode_ycbcr_out(th_dec_ctx *_dec,
  th_ycbcr_buffer _ycbcr);
  th_ycbcr_buffer _ycbcr);

+ 244 - 24
Engine/lib/libtheora/include/theora/theoraenc.h

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -49,26 +49,20 @@ extern "C" {
  *                     <tt>NULL</tt> and \a _buf_sz is not zero, or \a _buf is
  *                     <tt>NULL</tt> and \a _buf_sz is not zero, or \a _buf is
  *                     non-<tt>NULL</tt> and \a _buf_sz is not
  *                     non-<tt>NULL</tt> and \a _buf_sz is not
  *                     <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
  *                     <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
+ * \retval TH_EIMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_HUFFMAN_CODES (0)
 #define TH_ENCCTL_SET_HUFFMAN_CODES (0)
 /**Sets the quantization parameters to use.
 /**Sets the quantization parameters to use.
  * The parameters are copied, not stored by reference, so they can be freed
  * The parameters are copied, not stored by reference, so they can be freed
  *  after this call.
  *  after this call.
  * <tt>NULL</tt> may be specified to revert to the default parameters.
  * <tt>NULL</tt> may be specified to revert to the default parameters.
- * For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater than
- *  <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must be no
- *  greater than <tt>base[qti][pli][qi-1][ci]</tt>.
- * These two conditions ensure that the actual quantizer for a given \a qti,
- *  \a pli, and \a ci does not increase as \a qi increases.
  *
  *
  * \param[in] _buf #th_quant_info
  * \param[in] _buf #th_quant_info
  * \retval TH_EFAULT \a _enc_ctx is <tt>NULL</tt>.
  * \retval TH_EFAULT \a _enc_ctx is <tt>NULL</tt>.
- * \retval TH_EINVAL Encoding has already begun, the quantization parameters
- *                    do not meet one of the above stated conditions, \a _buf
- *                    is <tt>NULL</tt> and \a _buf_sz is not zero, or \a _buf
- *                    is non-<tt>NULL</tt> and \a _buf_sz is not
- *                    <tt>sizeof(#th_quant_info)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
+ * \retval TH_EINVAL Encoding has already begun, \a _buf is 
+ *                    <tt>NULL</tt> and \a _buf_sz is not zero,
+ *                    or \a _buf is non-<tt>NULL</tt> and
+ *                    \a _buf_sz is not <tt>sizeof(#th_quant_info)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_QUANT_PARAMS (2)
 #define TH_ENCCTL_SET_QUANT_PARAMS (2)
 /**Sets the maximum distance between key frames.
 /**Sets the maximum distance between key frames.
  * This can be changed during an encode, but will be bounded by
  * This can be changed during an encode, but will be bounded by
@@ -81,12 +75,12 @@ extern "C" {
  * \param[out] _buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
  * \param[out] _buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
+ * \retval TH_EIMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
 #define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
 /**Disables any encoder features that would prevent lossless transcoding back
 /**Disables any encoder features that would prevent lossless transcoding back
  *  to VP3.
  *  to VP3.
- * This primarily means disabling block-level QI values and not using 4MV mode
- *  when any of the luma blocks in a macro block are not coded.
+ * This primarily means disabling block-adaptive quantization and always coding
+ *  all four luma blocks in a macro block when 4MV is used.
  * It also includes using the VP3 quantization tables and Huffman codes; if you
  * It also includes using the VP3 quantization tables and Huffman codes; if you
  *  set them explicitly after calling this function, the resulting stream will
  *  set them explicitly after calling this function, the resulting stream will
  *  not be VP3-compatible.
  *  not be VP3-compatible.
@@ -109,7 +103,7 @@ extern "C" {
  *                   tables and codebooks from being set.
  *                   tables and codebooks from being set.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
+ * \retval TH_EIMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
 #define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
 /**Gets the maximum speed level.
 /**Gets the maximum speed level.
  * Higher speed levels favor quicker encoding over better quality per bit.
  * Higher speed levels favor quicker encoding over better quality per bit.
@@ -117,28 +111,254 @@ extern "C" {
  *  may actually improve, but in this case bitrate will also likely increase.
  *  may actually improve, but in this case bitrate will also likely increase.
  * In any case, overall rate/distortion performance will probably decrease.
  * In any case, overall rate/distortion performance will probably decrease.
  * The maximum value, and the meaning of each value, may change depending on
  * The maximum value, and the meaning of each value, may change depending on
- *  the current encoding mode (VBR vs. CQI, etc.).
+ *  the current encoding mode (VBR vs. constant quality, etc.).
  *
  *
- * \param[out] _buf int: The maximum encoding speed level.
+ * \param[out] _buf <tt>int</tt>: The maximum encoding speed level.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
- * \retval TH_IMPL   Not supported by this implementation in the current
+ * \retval TH_EIMPL   Not supported by this implementation in the current
  *                    encoding mode.*/
  *                    encoding mode.*/
 #define TH_ENCCTL_GET_SPLEVEL_MAX (12)
 #define TH_ENCCTL_GET_SPLEVEL_MAX (12)
 /**Sets the speed level.
 /**Sets the speed level.
- * By default, the slowest speed (0) is used.
+ * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL.
  *
  *
- * \param[in] _buf int: The new encoding speed level.
- *                      0 is slowest, larger values use less CPU.
+ * \param[in] _buf <tt>int</tt>: The new encoding speed level.
+ *                 0 is slowest, larger values use less CPU.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
  * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
  *                    encoding speed level is out of bounds.
  *                    encoding speed level is out of bounds.
  *                   The maximum encoding speed level may be
  *                   The maximum encoding speed level may be
  *                    implementation- and encoding mode-specific, and can be
  *                    implementation- and encoding mode-specific, and can be
  *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
  *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
- * \retval TH_IMPL   Not supported by this implementation in the current
+ * \retval TH_EIMPL   Not supported by this implementation in the current
  *                    encoding mode.*/
  *                    encoding mode.*/
 #define TH_ENCCTL_SET_SPLEVEL (14)
 #define TH_ENCCTL_SET_SPLEVEL (14)
+/**Gets the current speed level.
+ * The default speed level may vary according to encoder implementation, but if
+ *  this control code is not supported (it returns #TH_EIMPL), the default may
+ *  be assumed to be the slowest available speed (0).
+ * The maximum encoding speed level may be implementation- and encoding
+ *  mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ *
+ * \param[out] _buf <tt>int</tt>: The current encoding speed level.
+ *                  0 is slowest, larger values use less CPU.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL (16)
+/**Sets the number of duplicates of the next frame to produce.
+ * Although libtheora can encode duplicate frames very cheaply, it costs some
+ *  amount of CPU to detect them, and a run of duplicates cannot span a
+ *  keyframe boundary.
+ * This control code tells the encoder to produce the specified number of extra
+ *  duplicates of the next frame.
+ * This allows the encoder to make smarter keyframe placement decisions and
+ *  rate control decisions, and reduces CPU usage as well, when compared to
+ *  just submitting the same frame for encoding multiple times.
+ * This setting only applies to the next frame submitted for encoding.
+ * You MUST call th_encode_packetout() repeatedly until it returns 0, or the
+ *  extra duplicate frames will be lost.
+ *
+ * \param[in] _buf <tt>int</tt>: The number of duplicates to produce.
+ *                 If this is negative or zero, no duplicates will be produced.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    number of duplicates is greater than or equal to the
+ *                    maximum keyframe interval.
+ *                   In the latter case, NO duplicate frames will be produced.
+ *                   You must ensure that the maximum keyframe interval is set
+ *                    larger than the maximum number of duplicates you will
+ *                    ever wish to insert prior to encoding.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_DUP_COUNT (18)
+/**Modifies the default bitrate management behavior.
+ * Use to allow or disallow frame dropping, and to enable or disable capping
+ *  bit reservoir overflows and underflows.
+ * See \ref encctlcodes "the list of available flags".
+ * The flags are set by default to
+ *  <tt>#TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW</tt>.
+ *
+ * \param[in] _buf <tt>int</tt>: Any combination of
+ *                  \ref ratectlflags "the available flags":
+ *                 - #TH_RATECTL_DROP_FRAMES: Enable frame dropping.
+ *                 - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later
+ *                    use.
+ *                 - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls
+ *                    later.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt> or rate control
+ *                    is not enabled.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_RATE_FLAGS (20)
+/**Sets the size of the bitrate management bit reservoir as a function
+ *  of number of frames.
+ * The reservoir size affects how quickly bitrate management reacts to
+ *  instantaneous changes in the video complexity.
+ * Larger reservoirs react more slowly, and provide better overall quality, but
+ *  require more buffering by a client, adding more latency to live streams.
+ * By default, libtheora sets the reservoir to the maximum distance between
+ *  keyframes, subject to a minimum and maximum limit.
+ * This call may be used to increase or decrease the reservoir, increasing or
+ *  decreasing the allowed temporary variance in bitrate.
+ * An implementation may impose some limits on the size of a reservoir it can
+ *  handle, in which case the actual reservoir size may not be exactly what was
+ *  requested.
+ * The actual value set will be returned.
+ *
+ * \param[in]  _buf <tt>int</tt>: Requested size of the reservoir measured in
+ *                   frames.
+ * \param[out] _buf <tt>int</tt>: The actual size of the reservoir set.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or rate control
+ *                    is not enabled.  The buffer has an implementation
+ *                    defined minimum and maximum size and the value in _buf
+ *                    will be adjusted to match the actual value set.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_RATE_BUFFER (22)
+/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics.
+ * Pass 1 mode must be enabled before the first frame is encoded, and a target
+ *  bitrate must have already been specified to the encoder.
+ * Although this does not have to be the exact rate that will be used in the
+ *  second pass, closer values may produce better results.
+ * The first call returns the size of the two-pass header data, along with some
+ *  placeholder content, and sets the encoder into pass 1 mode implicitly.
+ * This call sets the encoder to pass 1 mode implicitly.
+ * Then, a subsequent call must be made after each call to
+ *  th_encode_ycbcr_in() to retrieve the metrics for that frame.
+ * An additional, final call must be made to retrieve the summary data,
+ *  containing such information as the total number of frames, etc.
+ * This must be stored in place of the placeholder data that was returned
+ *  in the first call, before the frame metrics data.
+ * All of this data must be presented back to the encoder during pass 2 using
+ *  #TH_ENCCTL_2PASS_IN.
+ *
+ * \param[out] <tt>char *</tt>_buf: Returns a pointer to internal storage
+ *              containing the two pass metrics data.
+ *             This storage is only valid until the next call, or until the
+ *              encoder context is freed, and must be copied by the
+ *              application.
+ * \retval >=0       The number of bytes of metric data available in the
+ *                    returned buffer.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(char *)</tt>, no target
+ *                    bitrate has been set, or the first call was made after
+ *                    the first frame was submitted for encoding.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_OUT (24)
+/**Submits two-pass encoding metric data collected the first encoding pass to
+ *  the second pass.
+ * The first call must be made before the first frame is encoded, and a target
+ *  bitrate must have already been specified to the encoder.
+ * It sets the encoder to pass 2 mode implicitly; this cannot be disabled.
+ * The encoder may require reading data from some or all of the frames in
+ *  advance, depending on, e.g., the reservoir size used in the second pass.
+ * You must call this function repeatedly before each frame to provide data
+ *  until either a) it fails to consume all of the data presented or b) all of
+ *  the pass 1 data has been consumed.
+ * In the first case, you must save the remaining data to be presented after
+ *  the next frame.
+ * You can call this function with a NULL argument to get an upper bound on
+ *  the number of bytes that will be required before the next frame.
+ *
+ * When pass 2 is first enabled, the default bit reservoir is set to the entire
+ *  file; this gives maximum flexibility but can lead to very high peak rates.
+ * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER
+ *  (e.g., to set it to the keyframe interval for non-live streaming), however,
+ *  you may then need to provide more data before the next frame.
+ *
+ * \param[in] _buf <tt>char[]</tt>: A buffer containing the data returned by
+ *                  #TH_ENCCTL_2PASS_OUT in pass 1.
+ *                 You may pass <tt>NULL</tt> for \a _buf to return an upper
+ *                  bound on the number of additional bytes needed before the
+ *                  next frame.
+ *                 The summary data returned at the end of pass 1 must be at
+ *                  the head of the buffer on the first call with a
+ *                  non-<tt>NULL</tt> \a _buf, and the placeholder data
+ *                  returned at the start of pass 1 should be omitted.
+ *                 After each call you should advance this buffer by the number
+ *                  of bytes consumed.
+ * \retval >0            The number of bytes of metric data required/consumed.
+ * \retval 0             No more data is required before the next frame.
+ * \retval TH_EFAULT     \a _enc_ctx is <tt>NULL</tt>.
+ * \retval TH_EINVAL     No target bitrate has been set, or the first call was
+ *                        made after the first frame was submitted for
+ *                        encoding.
+ * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible
+ *                        implementation of this library.
+ * \retval TH_EBADHEADER The data was invalid; this may be returned when
+ *                        attempting to read an aborted pass 1 file that still
+ *                        has the placeholder data in place of the summary
+ *                        data.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_IN (26)
+/**Sets the current encoding quality.
+ * This is only valid so long as no bitrate has been specified, either through
+ *  the #th_info struct used to initialize the encoder or through
+ *  #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future
+ *  version).
+ * If it is set before the headers are emitted, the target quality encoded in
+ *  them will be updated.
+ *
+ * \param[in] _buf <tt>int</tt>: The new target quality, in the range 0...63,
+ *                  inclusive.
+ * \retval 0             Success.
+ * \retval TH_EFAULT     \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL     A target bitrate has already been specified, or the
+ *                        quality index was not in the range 0...63.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUALITY (28)
+/**Sets the current encoding bitrate.
+ * Once a bitrate is set, the encoder must use a rate-controlled mode for all
+ *  future frames (this restriction may be relaxed in a future version).
+ * If it is set before the headers are emitted, the target bitrate encoded in
+ *  them will be updated.
+ * Due to the buffer delay, the exact bitrate of each section of the encode is
+ *  not guaranteed.
+ * The encoder may have already used more bits than allowed for the frames it
+ *  has encoded, expecting to make them up in future frames, or it may have
+ *  used fewer, holding the excess in reserve.
+ * The exact transition between the two bitrates is not well-defined by this
+ *  API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS.
+ * After a number of frames equal to the buffer delay, one may expect further
+ *  output to average at the target bitrate.
+ *
+ * \param[in] _buf <tt>long</tt>: The new target bitrate, in bits per second.
+ * \retval 0             Success.
+ * \retval TH_EFAULT     \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL     The target bitrate was not positive.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_SET_BITRATE (30)
+
+/*@}*/
+
+
+/**\name TH_ENCCTL_SET_RATE_FLAGS flags
+ * \anchor ratectlflags
+ * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/
+/*@{*/
+/**Drop frames to keep within bitrate buffer constraints.
+ * This can have a severe impact on quality, but is the only way to ensure that
+ *  bitrate targets are met at low rates during sudden bursts of activity.*/
+#define TH_RATECTL_DROP_FRAMES   (0x1)
+/**Ignore bitrate buffer overflows.
+ * If the encoder uses so few bits that the reservoir of available bits
+ *  overflows, ignore the excess.
+ * The encoder will not try to use these extra bits in future frames.
+ * At high rates this may cause the result to be undersized, but allows a
+ *  client to play the stream using a finite buffer; it should normally be
+ *  enabled.*/
+#define TH_RATECTL_CAP_OVERFLOW  (0x2)
+/**Ignore bitrate buffer underflows.
+ * If the encoder uses so many bits that the reservoir of available bits
+ *  underflows, ignore the deficit.
+ * The encoder will not try to make up these extra bits in future frames.
+ * At low rates this may cause the result to be oversized; it should normally
+ *  be disabled.*/
+#define TH_RATECTL_CAP_UNDERFLOW (0x4)
 /*@}*/
 /*@}*/
 
 
 
 

+ 173 - 0
Engine/lib/libtheora/lib/Makefile.am

@@ -0,0 +1,173 @@
+INCLUDES = -I$(top_srcdir)/include
+AM_CFLAGS = $(OGG_CFLAGS) $(CAIRO_CFLAGS)
+
+EXTRA_DIST = \
+	cpu.c \
+	encoder_disabled.c \
+	x86/mmxencfrag.c \
+	x86/mmxfdct.c \
+	x86/sse2fdct.c \
+	x86/x86enc.c \
+	x86/x86enc.h \
+	x86/mmxfrag.c \
+	x86/mmxfrag.h \
+	x86/mmxidct.c \
+	x86/mmxloop.h \
+	x86/mmxstate.c \
+	x86/x86int.h \
+	x86/x86state.c \
+	x86_vc
+
+lib_LTLIBRARIES = libtheoradec.la libtheoraenc.la libtheora.la
+
+if THEORA_DISABLE_ENCODE
+encoder_uniq_sources = \
+	encoder_disabled.c
+
+encoder_sources = \
+	$(encoder_uniq_sources)
+else
+encoder_uniq_x86_sources = \
+	x86/mmxencfrag.c \
+	x86/mmxfdct.c \
+	x86/x86enc.c
+
+encoder_uniq_x86_64_sources = \
+	x86/sse2fdct.c
+
+encoder_shared_x86_sources = \
+	x86/mmxfrag.c \
+	x86/mmxidct.c \
+	x86/mmxstate.c \
+	x86/x86state.c
+
+encoder_shared_x86_64_sources =
+
+if CPU_x86_64
+encoder_uniq_arch_sources = \
+ $(encoder_uniq_x86_sources) \
+ $(encoder_uniq_x86_64_sources)
+encoder_shared_arch_sources = \
+ $(encoder_shared_x86_sources) \
+ $(encoder_shared_x86_64_sources)
+else
+if CPU_x86_32
+encoder_uniq_arch_sources = $(encoder_uniq_x86_sources)
+encoder_shared_arch_sources = $(encoder_shared_x86_sources)
+else
+encoder_uniq_arch_sources =
+encoder_shared_arch_sources =
+endif
+endif
+
+encoder_uniq_sources = \
+	analyze.c \
+	fdct.c \
+	encfrag.c \
+	encapiwrapper.c \
+	encinfo.c \
+	encode.c \
+	enquant.c \
+	huffenc.c \
+	mathops.c \
+	mcenc.c \
+	rate.c \
+	tokenize.c \
+	$(encoder_uniq_arch_sources)
+
+encoder_sources = \
+	apiwrapper.c \
+	fragment.c \
+	idct.c \
+	internal.c \
+	state.c \
+	quant.c \
+	$(encoder_shared_arch_sources) \
+	$(encoder_uniq_sources)
+
+endif
+
+decoder_x86_sources = \
+	x86/mmxidct.c \
+	x86/mmxfrag.c \
+	x86/mmxstate.c \
+	x86/x86state.c
+if CPU_x86_64
+decoder_arch_sources = $(decoder_x86_sources)
+else
+if CPU_x86_32
+decoder_arch_sources = $(decoder_x86_sources)
+else
+decoder_arch_sources =
+endif
+endif
+
+decoder_sources = \
+	apiwrapper.c \
+	bitpack.c \
+	decapiwrapper.c \
+	decinfo.c \
+	decode.c \
+	dequant.c \
+	fragment.c \
+	huffdec.c \
+	idct.c \
+	info.c \
+	internal.c \
+	quant.c \
+	state.c \
+	$(decoder_arch_sources)
+
+noinst_HEADERS = \
+	cpu.h \
+	internal.h \
+	encint.h \
+	enquant.h \
+	huffenc.h \
+	mathops.h \
+	modedec.h \
+	x86/x86enc.h \
+	apiwrapper.h \
+	bitpack.h \
+	dct.h \
+	decint.h \
+	dequant.h \
+	huffdec.h \
+	huffman.h \
+	ocintrin.h \
+	quant.h \
+	x86/mmxfrag.h \
+	x86/mmxloop.h \
+	x86/x86int.h
+
+libtheoradec_la_SOURCES = \
+	$(decoder_sources) \
+	Version_script-dec theoradec.exp
+libtheoradec_la_LDFLAGS = \
+  -version-info @THDEC_LIB_CURRENT@:@THDEC_LIB_REVISION@:@THDEC_LIB_AGE@ \
+  @THEORADEC_LDFLAGS@ @CAIRO_LIBS@
+
+libtheoraenc_la_SOURCES = \
+	$(encoder_sources) \
+	Version_script-enc theoraenc.exp
+libtheoraenc_la_LDFLAGS = \
+  -version-info @THENC_LIB_CURRENT@:@THENC_LIB_REVISION@:@THENC_LIB_AGE@ \
+  @THEORAENC_LDFLAGS@ $(OGG_LIBS)
+
+libtheora_la_SOURCES = \
+	$(decoder_sources) \
+	$(encoder_uniq_sources) \
+	Version_script theora.exp
+libtheora_la_LDFLAGS = \
+  -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ \
+  @THEORA_LDFLAGS@ @CAIRO_LIBS@ $(OGG_LIBS)
+
+debug:
+	$(MAKE) all CFLAGS="@DEBUG@" 
+
+profile:
+	$(MAKE) all CFLAGS="@PROFILE@"
+
+# contstruct various symbol export list files
+.def.exp : defexp.awk
+	awk -f defexp.awk $< > $@

+ 845 - 0
Engine/lib/libtheora/lib/Makefile.in

@@ -0,0 +1,845 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AR = @AR@
+ARGZ_H = @ARGZ_H@
+AS = @AS@
+AWK = @AWK@
+BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_LIBS = @CAIRO_LIBS@
+CC = @CC@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEBUG = @DEBUG@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+F77 = @F77@
+GCJ = @GCJ@
+GCJFLAGS = @GCJFLAGS@
+GETOPT_OBJS = @GETOPT_OBJS@
+GREP = @GREP@
+HAVE_BIBTEX = @HAVE_BIBTEX@
+HAVE_DOXYGEN = @HAVE_DOXYGEN@
+HAVE_PDFLATEX = @HAVE_PDFLATEX@
+HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
+HAVE_TRANSFIG = @HAVE_TRANSFIG@
+HAVE_VALGRIND = @HAVE_VALGRIND@
+INCLTDL = @INCLTDL@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LIBADD_DL = @LIBADD_DL@
+LIBADD_DLD_LINK = @LIBADD_DLD_LINK@
+LIBADD_DLOPEN = @LIBADD_DLOPEN@
+LIBADD_SHL_LOAD = @LIBADD_SHL_LOAD@
+LIBLTDL = @LIBLTDL@
+LIBM = @LIBM@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTDLDEPS = @LTDLDEPS@
+LTDLINCL = @LTDLINCL@
+LTDLOPEN = @LTDLOPEN@
+LT_CONFIG_H = @LT_CONFIG_H@
+LT_DLLOADERS = @LT_DLLOADERS@
+LT_DLPREOPEN = @LT_DLPREOPEN@
+MAINT = @MAINT@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PKG_CONFIG = @PKG_CONFIG@
+PNG_CFLAGS = @PNG_CFLAGS@
+PNG_LIBS = @PNG_LIBS@
+PROFILE = @PROFILE@
+RANLIB = @RANLIB@
+RC = @RC@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_CONFIG = @SDL_CONFIG@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+STRIP = @STRIP@
+THDEC_LIB_AGE = @THDEC_LIB_AGE@
+THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@
+THDEC_LIB_REVISION = @THDEC_LIB_REVISION@
+THENC_LIB_AGE = @THENC_LIB_AGE@
+THENC_LIB_CURRENT = @THENC_LIB_CURRENT@
+THENC_LIB_REVISION = @THENC_LIB_REVISION@
+THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@
+THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@
+THEORA_LDFLAGS = @THEORA_LDFLAGS@
+TH_LIB_AGE = @TH_LIB_AGE@
+TH_LIB_CURRENT = @TH_LIB_CURRENT@
+TH_LIB_REVISION = @TH_LIB_REVISION@
+VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@
+VERSION = @VERSION@
+VORBISENC_LIBS = @VORBISENC_LIBS@
+VORBISFILE_LIBS = @VORBISFILE_LIBS@
+VORBIS_CFLAGS = @VORBIS_CFLAGS@
+VORBIS_LIBS = @VORBIS_LIBS@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+lt_ECHO = @lt_ECHO@
+ltdl_LIBOBJS = @ltdl_LIBOBJS@
+ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@
+sys_symbol_underscore = @sys_symbol_underscore@
+INCLUDES = -I$(top_srcdir)/include
+AM_CFLAGS = $(OGG_CFLAGS) $(CAIRO_CFLAGS)
+
+EXTRA_DIST = \
+	cpu.c \
+	encoder_disabled.c \
+	x86/mmxencfrag.c \
+	x86/mmxfdct.c \
+	x86/sse2fdct.c \
+	x86/x86enc.c \
+	x86/x86enc.h \
+	x86/mmxfrag.c \
+	x86/mmxfrag.h \
+	x86/mmxidct.c \
+	x86/mmxloop.h \
+	x86/mmxstate.c \
+	x86/x86int.h \
+	x86/x86state.c \
+	x86_vc
+
+
+lib_LTLIBRARIES = libtheoradec.la libtheoraenc.la libtheora.la
+
+@THEORA_DISABLE_ENCODE_TRUE@encoder_uniq_sources = \
+@THEORA_DISABLE_ENCODE_TRUE@	encoder_disabled.c
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@	analyze.c \
+@THEORA_DISABLE_ENCODE_FALSE@	fdct.c \
+@THEORA_DISABLE_ENCODE_FALSE@	encfrag.c \
+@THEORA_DISABLE_ENCODE_FALSE@	encapiwrapper.c \
+@THEORA_DISABLE_ENCODE_FALSE@	encinfo.c \
+@THEORA_DISABLE_ENCODE_FALSE@	encode.c \
+@THEORA_DISABLE_ENCODE_FALSE@	enquant.c \
+@THEORA_DISABLE_ENCODE_FALSE@	huffenc.c \
+@THEORA_DISABLE_ENCODE_FALSE@	mathops.c \
+@THEORA_DISABLE_ENCODE_FALSE@	mcenc.c \
+@THEORA_DISABLE_ENCODE_FALSE@	rate.c \
+@THEORA_DISABLE_ENCODE_FALSE@	tokenize.c \
+@THEORA_DISABLE_ENCODE_FALSE@	$(encoder_uniq_arch_sources)
+
+
+@THEORA_DISABLE_ENCODE_TRUE@encoder_sources = \
+@THEORA_DISABLE_ENCODE_TRUE@	$(encoder_uniq_sources)
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@	apiwrapper.c \
+@THEORA_DISABLE_ENCODE_FALSE@	fragment.c \
+@THEORA_DISABLE_ENCODE_FALSE@	idct.c \
+@THEORA_DISABLE_ENCODE_FALSE@	internal.c \
+@THEORA_DISABLE_ENCODE_FALSE@	state.c \
+@THEORA_DISABLE_ENCODE_FALSE@	quant.c \
+@THEORA_DISABLE_ENCODE_FALSE@	$(encoder_shared_arch_sources) \
+@THEORA_DISABLE_ENCODE_FALSE@	$(encoder_uniq_sources)
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_x86_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/mmxencfrag.c \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/mmxfdct.c \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/x86enc.c
+
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_x86_64_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/sse2fdct.c
+
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_x86_sources = \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/mmxfrag.c \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/mmxidct.c \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/mmxstate.c \
+@THEORA_DISABLE_ENCODE_FALSE@	x86/x86state.c
+
+
+@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_x86_64_sources = 
+
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = 
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = $(encoder_uniq_x86_sources)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@encoder_uniq_arch_sources = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_x86_sources) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_uniq_x86_64_sources)
+
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = 
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = $(encoder_shared_x86_sources)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@encoder_shared_arch_sources = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_x86_sources) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@ $(encoder_shared_x86_64_sources)
+
+
+decoder_x86_sources = \
+	x86/mmxidct.c \
+	x86/mmxfrag.c \
+	x86/mmxstate.c \
+	x86/x86state.c
+
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@decoder_arch_sources = 
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@decoder_arch_sources = $(decoder_x86_sources)
+@CPU_x86_64_TRUE@decoder_arch_sources = $(decoder_x86_sources)
+
+decoder_sources = \
+	apiwrapper.c \
+	bitpack.c \
+	decapiwrapper.c \
+	decinfo.c \
+	decode.c \
+	dequant.c \
+	fragment.c \
+	huffdec.c \
+	idct.c \
+	info.c \
+	internal.c \
+	quant.c \
+	state.c \
+	$(decoder_arch_sources)
+
+
+noinst_HEADERS = \
+	cpu.h \
+	internal.h \
+	encint.h \
+	enquant.h \
+	huffenc.h \
+	mathops.h \
+	modedec.h \
+	x86/x86enc.h \
+	apiwrapper.h \
+	bitpack.h \
+	dct.h \
+	decint.h \
+	dequant.h \
+	huffdec.h \
+	huffman.h \
+	ocintrin.h \
+	quant.h \
+	x86/mmxfrag.h \
+	x86/mmxloop.h \
+	x86/x86int.h
+
+
+libtheoradec_la_SOURCES = \
+	$(decoder_sources) \
+	Version_script-dec theoradec.exp
+
+libtheoradec_la_LDFLAGS = \
+  -version-info @THDEC_LIB_CURRENT@:@THDEC_LIB_REVISION@:@THDEC_LIB_AGE@ \
+  @THEORADEC_LDFLAGS@ @CAIRO_LIBS@
+
+
+libtheoraenc_la_SOURCES = \
+	$(encoder_sources) \
+	Version_script-enc theoraenc.exp
+
+libtheoraenc_la_LDFLAGS = \
+  -version-info @THENC_LIB_CURRENT@:@THENC_LIB_REVISION@:@THENC_LIB_AGE@ \
+  @THEORAENC_LDFLAGS@ $(OGG_LIBS)
+
+
+libtheora_la_SOURCES = \
+	$(decoder_sources) \
+	$(encoder_uniq_sources) \
+	Version_script theora.exp
+
+libtheora_la_LDFLAGS = \
+  -version-info @TH_LIB_CURRENT@:@TH_LIB_REVISION@:@TH_LIB_AGE@ \
+  @THEORA_LDFLAGS@ @CAIRO_LIBS@ $(OGG_LIBS)
+
+subdir = lib
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(lib_LTLIBRARIES)
+
+libtheora_la_LIBADD =
+am__objects_1 = mmxidct.lo mmxfrag.lo mmxstate.lo x86state.lo
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@am__objects_2 =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@am__objects_2 = $(am__objects_1)
+@CPU_x86_64_TRUE@am__objects_2 = $(am__objects_1)
+am__objects_3 = apiwrapper.lo bitpack.lo decapiwrapper.lo decinfo.lo \
+	decode.lo dequant.lo fragment.lo huffdec.lo idct.lo info.lo \
+	internal.lo quant.lo state.lo $(am__objects_2)
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_4 = mmxencfrag.lo mmxfdct.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	x86enc.lo
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_5 = sse2fdct.lo
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 = \
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_4)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_6 = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_4) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_5)
+@THEORA_DISABLE_ENCODE_TRUE@am__objects_7 = encoder_disabled.lo
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_7 = analyze.lo fdct.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	encfrag.lo encapiwrapper.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	encinfo.lo encode.lo enquant.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	huffenc.lo mathops.lo mcenc.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	rate.lo tokenize.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_6)
+am_libtheora_la_OBJECTS = $(am__objects_3) $(am__objects_7)
+libtheora_la_OBJECTS = $(am_libtheora_la_OBJECTS)
+libtheoradec_la_LIBADD =
+am_libtheoradec_la_OBJECTS = $(am__objects_3)
+libtheoradec_la_OBJECTS = $(am_libtheoradec_la_OBJECTS)
+libtheoraenc_la_LIBADD =
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_8 = mmxfrag.lo mmxidct.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	mmxstate.lo x86state.lo
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_9 =
+@CPU_x86_32_FALSE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 =
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 = \
+@CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_8)
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_10 = \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_8) \
+@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_9)
+@THEORA_DISABLE_ENCODE_TRUE@am__objects_11 = $(am__objects_7)
+@THEORA_DISABLE_ENCODE_FALSE@am__objects_11 = apiwrapper.lo fragment.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	idct.lo internal.lo state.lo \
+@THEORA_DISABLE_ENCODE_FALSE@	quant.lo $(am__objects_10) \
+@THEORA_DISABLE_ENCODE_FALSE@	$(am__objects_7)
+am_libtheoraenc_la_OBJECTS = $(am__objects_11)
+libtheoraenc_la_OBJECTS = $(am_libtheoraenc_la_OBJECTS)
+
+DEFS = @DEFS@
+DEFAULT_INCLUDES =  -I. -I$(srcdir) -I$(top_builddir)
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/analyze.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/apiwrapper.Plo ./$(DEPDIR)/bitpack.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/decapiwrapper.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/decinfo.Plo ./$(DEPDIR)/decode.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/dequant.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/encapiwrapper.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/encfrag.Plo ./$(DEPDIR)/encinfo.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/encode.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/encoder_disabled.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/enquant.Plo ./$(DEPDIR)/fdct.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/fragment.Plo ./$(DEPDIR)/huffdec.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/huffenc.Plo ./$(DEPDIR)/idct.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/info.Plo ./$(DEPDIR)/internal.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/mathops.Plo ./$(DEPDIR)/mcenc.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/mmxencfrag.Plo ./$(DEPDIR)/mmxfdct.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/mmxfrag.Plo ./$(DEPDIR)/mmxidct.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/mmxstate.Plo ./$(DEPDIR)/quant.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/rate.Plo ./$(DEPDIR)/sse2fdct.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/state.Plo ./$(DEPDIR)/tokenize.Plo \
+@AMDEP_TRUE@	./$(DEPDIR)/x86enc.Plo ./$(DEPDIR)/x86state.Plo
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) \
+	$(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+CFLAGS = @CFLAGS@
+DIST_SOURCES = $(libtheora_la_SOURCES) $(libtheoradec_la_SOURCES) \
+	$(libtheoraenc_la_SOURCES)
+HEADERS = $(noinst_HEADERS)
+
+DIST_COMMON = $(noinst_HEADERS) Makefile.am Makefile.in
+SOURCES = $(libtheora_la_SOURCES) $(libtheoradec_la_SOURCES) $(libtheoraenc_la_SOURCES)
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .def .exp .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am  $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  lib/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+libLTLIBRARIES_INSTALL = $(INSTALL)
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(libdir)
+	@list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+	  if test -f $$p; then \
+	    f="`echo $$p | sed -e 's|^.*/||'`"; \
+	    echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f"; \
+	    $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$f; \
+	  else :; fi; \
+	done
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+	    p="`echo $$p | sed -e 's|^.*/||'`"; \
+	  echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \
+	  $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+	  dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+	  test -z "$dir" && dir=.; \
+	  echo "rm -f \"$${dir}/so_locations\""; \
+	  rm -f "$${dir}/so_locations"; \
+	done
+mmxidct.lo: x86/mmxidct.c
+mmxfrag.lo: x86/mmxfrag.c
+mmxstate.lo: x86/mmxstate.c
+x86state.lo: x86/x86state.c
+mmxencfrag.lo: x86/mmxencfrag.c
+mmxfdct.lo: x86/mmxfdct.c
+x86enc.lo: x86/x86enc.c
+sse2fdct.lo: x86/sse2fdct.c
+libtheora.la: $(libtheora_la_OBJECTS) $(libtheora_la_DEPENDENCIES) 
+	$(LINK) -rpath $(libdir) $(libtheora_la_LDFLAGS) $(libtheora_la_OBJECTS) $(libtheora_la_LIBADD) $(LIBS)
+libtheoradec.la: $(libtheoradec_la_OBJECTS) $(libtheoradec_la_DEPENDENCIES) 
+	$(LINK) -rpath $(libdir) $(libtheoradec_la_LDFLAGS) $(libtheoradec_la_OBJECTS) $(libtheoradec_la_LIBADD) $(LIBS)
+libtheoraenc.la: $(libtheoraenc_la_OBJECTS) $(libtheoraenc_la_DEPENDENCIES) 
+	$(LINK) -rpath $(libdir) $(libtheoraenc_la_LDFLAGS) $(libtheoraenc_la_OBJECTS) $(libtheoraenc_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT) core *.core
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/analyze.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/apiwrapper.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bitpack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decapiwrapper.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decinfo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dequant.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encapiwrapper.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encfrag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encinfo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encoder_disabled.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/enquant.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fdct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fragment.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/huffdec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/huffenc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/idct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/info.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/internal.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mathops.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mcenc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxencfrag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxfdct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxfrag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxidct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mmxstate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quant.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sse2fdct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tokenize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86enc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/x86state.Plo@am__quote@
+
+distclean-depend:
+	-rm -rf ./$(DEPDIR)
+
+.c.o:
+@AMDEP_TRUE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
+
+.c.obj:
+@AMDEP_TRUE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(COMPILE) -c `cygpath -w $<`
+
+.c.lo:
+@AMDEP_TRUE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/$*.Plo' tmpdepfile='$(DEPDIR)/$*.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LTCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$<
+
+mmxidct.o: x86/mmxidct.c
+@AMDEP_TRUE@	source='x86/mmxidct.c' object='mmxidct.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxidct.Po' tmpdepfile='$(DEPDIR)/mmxidct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.o `test -f 'x86/mmxidct.c' || echo '$(srcdir)/'`x86/mmxidct.c
+
+mmxidct.obj: x86/mmxidct.c
+@AMDEP_TRUE@	source='x86/mmxidct.c' object='mmxidct.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxidct.Po' tmpdepfile='$(DEPDIR)/mmxidct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.obj `cygpath -w x86/mmxidct.c`
+
+mmxidct.lo: x86/mmxidct.c
+@AMDEP_TRUE@	source='x86/mmxidct.c' object='mmxidct.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxidct.Plo' tmpdepfile='$(DEPDIR)/mmxidct.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxidct.lo `test -f 'x86/mmxidct.c' || echo '$(srcdir)/'`x86/mmxidct.c
+
+mmxfrag.o: x86/mmxfrag.c
+@AMDEP_TRUE@	source='x86/mmxfrag.c' object='mmxfrag.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxfrag.Po' tmpdepfile='$(DEPDIR)/mmxfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.o `test -f 'x86/mmxfrag.c' || echo '$(srcdir)/'`x86/mmxfrag.c
+
+mmxfrag.obj: x86/mmxfrag.c
+@AMDEP_TRUE@	source='x86/mmxfrag.c' object='mmxfrag.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxfrag.Po' tmpdepfile='$(DEPDIR)/mmxfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.obj `cygpath -w x86/mmxfrag.c`
+
+mmxfrag.lo: x86/mmxfrag.c
+@AMDEP_TRUE@	source='x86/mmxfrag.c' object='mmxfrag.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxfrag.Plo' tmpdepfile='$(DEPDIR)/mmxfrag.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfrag.lo `test -f 'x86/mmxfrag.c' || echo '$(srcdir)/'`x86/mmxfrag.c
+
+mmxstate.o: x86/mmxstate.c
+@AMDEP_TRUE@	source='x86/mmxstate.c' object='mmxstate.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxstate.Po' tmpdepfile='$(DEPDIR)/mmxstate.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.o `test -f 'x86/mmxstate.c' || echo '$(srcdir)/'`x86/mmxstate.c
+
+mmxstate.obj: x86/mmxstate.c
+@AMDEP_TRUE@	source='x86/mmxstate.c' object='mmxstate.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxstate.Po' tmpdepfile='$(DEPDIR)/mmxstate.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.obj `cygpath -w x86/mmxstate.c`
+
+mmxstate.lo: x86/mmxstate.c
+@AMDEP_TRUE@	source='x86/mmxstate.c' object='mmxstate.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxstate.Plo' tmpdepfile='$(DEPDIR)/mmxstate.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxstate.lo `test -f 'x86/mmxstate.c' || echo '$(srcdir)/'`x86/mmxstate.c
+
+x86state.o: x86/x86state.c
+@AMDEP_TRUE@	source='x86/x86state.c' object='x86state.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/x86state.Po' tmpdepfile='$(DEPDIR)/x86state.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.o `test -f 'x86/x86state.c' || echo '$(srcdir)/'`x86/x86state.c
+
+x86state.obj: x86/x86state.c
+@AMDEP_TRUE@	source='x86/x86state.c' object='x86state.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/x86state.Po' tmpdepfile='$(DEPDIR)/x86state.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.obj `cygpath -w x86/x86state.c`
+
+x86state.lo: x86/x86state.c
+@AMDEP_TRUE@	source='x86/x86state.c' object='x86state.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/x86state.Plo' tmpdepfile='$(DEPDIR)/x86state.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86state.lo `test -f 'x86/x86state.c' || echo '$(srcdir)/'`x86/x86state.c
+
+mmxencfrag.o: x86/mmxencfrag.c
+@AMDEP_TRUE@	source='x86/mmxencfrag.c' object='mmxencfrag.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxencfrag.Po' tmpdepfile='$(DEPDIR)/mmxencfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.o `test -f 'x86/mmxencfrag.c' || echo '$(srcdir)/'`x86/mmxencfrag.c
+
+mmxencfrag.obj: x86/mmxencfrag.c
+@AMDEP_TRUE@	source='x86/mmxencfrag.c' object='mmxencfrag.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxencfrag.Po' tmpdepfile='$(DEPDIR)/mmxencfrag.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.obj `cygpath -w x86/mmxencfrag.c`
+
+mmxencfrag.lo: x86/mmxencfrag.c
+@AMDEP_TRUE@	source='x86/mmxencfrag.c' object='mmxencfrag.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxencfrag.Plo' tmpdepfile='$(DEPDIR)/mmxencfrag.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxencfrag.lo `test -f 'x86/mmxencfrag.c' || echo '$(srcdir)/'`x86/mmxencfrag.c
+
+mmxfdct.o: x86/mmxfdct.c
+@AMDEP_TRUE@	source='x86/mmxfdct.c' object='mmxfdct.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxfdct.Po' tmpdepfile='$(DEPDIR)/mmxfdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.o `test -f 'x86/mmxfdct.c' || echo '$(srcdir)/'`x86/mmxfdct.c
+
+mmxfdct.obj: x86/mmxfdct.c
+@AMDEP_TRUE@	source='x86/mmxfdct.c' object='mmxfdct.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxfdct.Po' tmpdepfile='$(DEPDIR)/mmxfdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.obj `cygpath -w x86/mmxfdct.c`
+
+mmxfdct.lo: x86/mmxfdct.c
+@AMDEP_TRUE@	source='x86/mmxfdct.c' object='mmxfdct.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/mmxfdct.Plo' tmpdepfile='$(DEPDIR)/mmxfdct.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mmxfdct.lo `test -f 'x86/mmxfdct.c' || echo '$(srcdir)/'`x86/mmxfdct.c
+
+x86enc.o: x86/x86enc.c
+@AMDEP_TRUE@	source='x86/x86enc.c' object='x86enc.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/x86enc.Po' tmpdepfile='$(DEPDIR)/x86enc.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.o `test -f 'x86/x86enc.c' || echo '$(srcdir)/'`x86/x86enc.c
+
+x86enc.obj: x86/x86enc.c
+@AMDEP_TRUE@	source='x86/x86enc.c' object='x86enc.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/x86enc.Po' tmpdepfile='$(DEPDIR)/x86enc.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.obj `cygpath -w x86/x86enc.c`
+
+x86enc.lo: x86/x86enc.c
+@AMDEP_TRUE@	source='x86/x86enc.c' object='x86enc.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/x86enc.Plo' tmpdepfile='$(DEPDIR)/x86enc.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o x86enc.lo `test -f 'x86/x86enc.c' || echo '$(srcdir)/'`x86/x86enc.c
+
+sse2fdct.o: x86/sse2fdct.c
+@AMDEP_TRUE@	source='x86/sse2fdct.c' object='sse2fdct.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/sse2fdct.Po' tmpdepfile='$(DEPDIR)/sse2fdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.o `test -f 'x86/sse2fdct.c' || echo '$(srcdir)/'`x86/sse2fdct.c
+
+sse2fdct.obj: x86/sse2fdct.c
+@AMDEP_TRUE@	source='x86/sse2fdct.c' object='sse2fdct.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/sse2fdct.Po' tmpdepfile='$(DEPDIR)/sse2fdct.TPo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.obj `cygpath -w x86/sse2fdct.c`
+
+sse2fdct.lo: x86/sse2fdct.c
+@AMDEP_TRUE@	source='x86/sse2fdct.c' object='sse2fdct.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@	depfile='$(DEPDIR)/sse2fdct.Plo' tmpdepfile='$(DEPDIR)/sse2fdct.TPlo' @AMDEPBACKSLASH@
+@AMDEP_TRUE@	$(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+	$(LIBTOOL) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sse2fdct.lo `test -f 'x86/sse2fdct.c' || echo '$(srcdir)/'`x86/sse2fdct.c
+CCDEPMODE = @CCDEPMODE@
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+distclean-libtool:
+	-rm -f libtool
+uninstall-info-am:
+
+ETAGS = etags
+ETAGSFLAGS =
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	mkid -fID $$unique
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(ETAGS_ARGS)$$tags$$unique" \
+	  || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+	$(mkinstalldirs) $(distdir)/x86
+	@list='$(DISTFILES)'; for file in $$list; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkinstalldirs) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
+	  if test -d $$d/$$file; then \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS)
+
+installdirs:
+	$(mkinstalldirs) $(DESTDIR)$(libdir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-compile distclean-depend \
+	distclean-generic distclean-libtool distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES
+
+.PHONY: GTAGS all all-am check check-am clean clean-generic \
+	clean-libLTLIBRARIES clean-libtool distclean distclean-compile \
+	distclean-depend distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am info info-am install \
+	install-am install-data install-data-am install-exec \
+	install-exec-am install-info install-info-am \
+	install-libLTLIBRARIES install-man install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool tags uninstall \
+	uninstall-am uninstall-info-am uninstall-libLTLIBRARIES
+
+
+debug:
+	$(MAKE) all CFLAGS="@DEBUG@" 
+
+profile:
+	$(MAKE) all CFLAGS="@PROFILE@"
+
+# contstruct various symbol export list files
+.def.exp : defexp.awk
+	awk -f defexp.awk $< > $@
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

+ 53 - 0
Engine/lib/libtheora/lib/Version_script

@@ -0,0 +1,53 @@
+#
+# Export file for libtheora
+#
+# Only the symbols listed in the global section will be callable from
+# applications linking to the libraries.
+#
+
+# We use something that looks like a versioned so filename here 
+# to define the old API because of a historical confusion. This
+# label must be kept to maintain ABI compatibility.
+
+libtheora.so.1.0
+{
+	global:
+		theora_version_string;
+		theora_version_number;
+
+		theora_encode_init;
+		theora_encode_YUVin;
+		theora_encode_packetout;
+		theora_encode_header;
+		theora_encode_comment;
+		theora_encode_tables;
+
+		theora_decode_header;
+		theora_decode_init;
+		theora_decode_packetin;
+		theora_decode_YUVout;
+
+		theora_control;
+
+		theora_packet_isheader;
+		theora_packet_iskeyframe;
+
+		theora_granule_shift;
+		theora_granule_frame;
+		theora_granule_time;
+
+		theora_info_init;
+		theora_info_clear;
+
+		theora_clear;
+
+		theora_comment_init;
+		theora_comment_add;
+		theora_comment_add_tag;
+		theora_comment_query;
+		theora_comment_query_count;
+		theora_comment_clear;
+
+	local:
+		*;
+};

+ 82 - 0
Engine/lib/libtheora/lib/Version_script-dec

@@ -0,0 +1,82 @@
+#
+# Export file for libtheoradec
+#
+# Only the symbols listed in the global section will be callable from
+# applications linking to the libraries.
+#
+
+# The 1.x API
+libtheoradec_1.0
+{
+	global:
+		th_version_string;
+		th_version_number;
+
+		th_decode_headerin;
+		th_decode_alloc;
+		th_setup_free;
+		th_decode_ctl;
+		th_decode_packetin;
+		th_decode_ycbcr_out;
+		th_decode_free;
+
+		th_packet_isheader;
+		th_packet_iskeyframe;
+
+		th_granule_frame;
+		th_granule_time;
+
+		th_info_init;
+		th_info_clear;
+
+		th_comment_init;
+		th_comment_add;
+		th_comment_add_tag;
+		th_comment_query;
+		th_comment_query_count;
+		th_comment_clear;
+
+	local:
+		*;
+};
+
+# The deprecated legacy api from the libtheora alpha releases.
+# We use something that looks like a versioned so filename here 
+# to define the old API because of a historical confusion. This
+# label must be kept to maintain ABI compatibility.
+
+libtheora.so.1.0
+{
+	global:
+		theora_version_string;
+		theora_version_number;
+
+		theora_decode_header;
+		theora_decode_init;
+		theora_decode_packetin;
+		theora_decode_YUVout;
+
+		theora_control;
+
+		theora_packet_isheader;
+		theora_packet_iskeyframe;
+
+		theora_granule_shift;
+		theora_granule_frame;
+		theora_granule_time;
+
+		theora_info_init;
+		theora_info_clear;
+
+		theora_clear;
+
+		theora_comment_init;
+		theora_comment_add;
+		theora_comment_add_tag;
+		theora_comment_query;
+		theora_comment_query_count;
+		theora_comment_clear;
+
+	local:
+		*;
+};

+ 43 - 0
Engine/lib/libtheora/lib/Version_script-enc

@@ -0,0 +1,43 @@
+#
+# Export file for libtheora
+#
+# Only the symbols listed in the global section will be callable from
+# applications linking to the libraries.
+#
+
+# The 1.x encoder API
+libtheoraenc_1.0
+{
+	global:
+		th_encode_alloc;
+		th_encode_ctl;
+		th_encode_flushheader;
+		th_encode_ycbcr_in;
+		th_encode_packetout;
+		th_encode_free;
+
+		TH_VP31_QUANT_INFO;
+		TH_VP31_HUFF_CODES;
+
+	local:
+		*;
+};
+
+# The encoder portion of the deprecated alpha release api.
+# We use something that looks like a versioned so filename here 
+# to define the old API because of a historical confusion. This
+# label must be kept to maintain ABI compatibility.
+
+libtheora.so.1.0
+{
+	global:
+		theora_encode_init;
+		theora_encode_YUVin;
+		theora_encode_packetout;
+		theora_encode_header;
+		theora_encode_comment;
+		theora_encode_tables;
+
+	local:
+		*;
+};

+ 2709 - 0
Engine/lib/libtheora/lib/analyze.c

@@ -0,0 +1,2709 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: mode selection code
+  last mod: $Id$
+
+ ********************************************************************/
+#include <limits.h>
+#include <string.h>
+#include "encint.h"
+#include "modedec.h"
+
+
+
+typedef struct oc_fr_state           oc_fr_state;
+typedef struct oc_qii_state          oc_qii_state;
+typedef struct oc_enc_pipeline_state oc_enc_pipeline_state;
+typedef struct oc_rd_metric          oc_rd_metric;
+typedef struct oc_mode_choice        oc_mode_choice;
+
+
+
+/*There are 8 possible schemes used to encode macro block modes.
+  Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes.
+  The same set of Huffman codes is used for each of these 7 schemes, but the
+   mode assigned to each codeword varies.
+  Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream,
+   while schemes 1-6 have a fixed mapping.
+  Scheme 7 just encodes each mode directly in 3 bits.*/
+
+/*The mode orderings for the various mode coding schemes.
+  Scheme 0 uses a custom alphabet, which is not stored in this table.
+  This is the inverse of the equivalent table OC_MODE_ALPHABETS in the
+   decoder.*/
+static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={
+  /*Last MV dominates.*/ 
+  /*L P M N I G GM 4*/
+  {3,4,2,0,1,5,6,7},
+  /*L P N M I G GM 4*/
+  {2,4,3,0,1,5,6,7},
+  /*L M P N I G GM 4*/
+  {3,4,1,0,2,5,6,7},
+  /*L M N P I G GM 4*/
+  {2,4,1,0,3,5,6,7},
+  /*No MV dominates.*/
+  /*N L P M I G GM 4*/
+  {0,4,3,1,2,5,6,7},
+  /*N G L P M I GM 4*/
+  {0,5,4,2,3,1,6,7},
+  /*Default ordering.*/
+  /*N I M L P G GM 4*/
+  {0,1,2,3,4,5,6,7}
+};
+
+
+
+/*Initialize the mode scheme chooser.
+  This need only be called once per encoder.*/
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
+  int si;
+  _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
+  for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1];
+}
+
+/*Reset the mode scheme chooser.
+  This needs to be called once for each frame, including the first.*/
+static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
+  int si;
+  memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts));
+  /*Scheme 0 starts with 24 bits to store the mode list in.*/
+  _chooser->scheme_bits[0]=24;
+  memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits));
+  for(si=0;si<8;si++){
+    /*Scheme 7 should always start first, and scheme 0 should always start
+       last.*/
+    _chooser->scheme_list[si]=7-si;
+    _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si;
+  }
+}
+
+
+/*This is the real purpose of this data structure: not actually selecting a
+   mode scheme, but estimating the cost of coding a given mode given all the
+   modes selected so far.
+  This is done via opportunity cost: the cost is defined as the number of bits
+   required to encode all the modes selected so far including the current one
+   using the best possible scheme, minus the number of bits required to encode
+   all the modes selected so far not including the current one using the best
+   possible scheme.
+  The computational expense of doing this probably makes it overkill.
+  Just be happy we take a greedy approach instead of trying to solve the
+   global mode-selection problem (which is NP-hard).
+  _mb_mode: The mode to determine the cost of.
+  Return: The number of bits required to code this mode.*/
+static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+  int scheme0;
+  int scheme1;
+  int best_bits;
+  int mode_bits;
+  int si;
+  int scheme_bits;
+  scheme0=_chooser->scheme_list[0];
+  scheme1=_chooser->scheme_list[1];
+  best_bits=_chooser->scheme_bits[scheme0];
+  mode_bits=OC_MODE_BITS[scheme0+1>>3][_chooser->mode_ranks[scheme0][_mb_mode]];
+  /*Typical case: If the difference between the best scheme and the next best
+     is greater than 6 bits, then adding just one mode cannot change which
+     scheme we use.*/
+  if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
+  /*Otherwise, check to see if adding this mode selects a different scheme as
+     the best.*/
+  si=1;
+  best_bits+=mode_bits;
+  do{
+    /*For any scheme except 0, we can just use the bit cost of the mode's rank
+       in that scheme.*/
+    if(scheme1!=0){
+      scheme_bits=_chooser->scheme_bits[scheme1]+
+       OC_MODE_BITS[scheme1+1>>3][_chooser->mode_ranks[scheme1][_mb_mode]];
+    }
+    else{
+      int ri;
+      /*For scheme 0, incrementing the mode count could potentially change the
+         mode's rank.
+        Find the index where the mode would be moved to in the optimal list,
+         and use its bit cost instead of the one for the mode's current
+         position in the list.*/
+      /*We don't recompute scheme bits; this is computing opportunity cost, not
+         an update.*/
+      for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0&&
+       _chooser->mode_counts[_mb_mode]>=
+       _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
+      scheme_bits=_chooser->scheme_bits[0]+OC_MODE_BITS[0][ri];
+    }
+    if(scheme_bits<best_bits)best_bits=scheme_bits;
+    if(++si>=8)break;
+    scheme1=_chooser->scheme_list[si];
+  }
+  while(_chooser->scheme_bits[scheme1]-_chooser->scheme_bits[scheme0]<=6);
+  return best_bits-_chooser->scheme_bits[scheme0];
+}
+
+/*Incrementally update the mode counts and per-scheme bit counts and re-order
+   the scheme lists once a mode has been selected.
+  _mb_mode: The mode that was chosen.*/
+static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+  int ri;
+  int si;
+  _chooser->mode_counts[_mb_mode]++;
+  /*Re-order the scheme0 mode list if necessary.*/
+  for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){
+    int pmode;
+    pmode=_chooser->scheme0_list[ri-1];
+    if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break;
+    /*Reorder the mode ranking.*/
+    _chooser->scheme0_ranks[pmode]++;
+    _chooser->scheme0_list[ri]=pmode;
+  }
+  _chooser->scheme0_ranks[_mb_mode]=ri;
+  _chooser->scheme0_list[ri]=_mb_mode;
+  /*Now add the bit cost for the mode to each scheme.*/
+  for(si=0;si<8;si++){
+    _chooser->scheme_bits[si]+=
+     OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]];
+  }
+  /*Finally, re-order the list of schemes.*/
+  for(si=1;si<8;si++){
+    int sj;
+    int scheme0;
+    int bits0;
+    sj=si;
+    scheme0=_chooser->scheme_list[si];
+    bits0=_chooser->scheme_bits[scheme0];
+    do{
+      int scheme1;
+      scheme1=_chooser->scheme_list[sj-1];
+      if(bits0>=_chooser->scheme_bits[scheme1])break;
+      _chooser->scheme_list[sj]=scheme1;
+    }
+    while(--sj>0);
+    _chooser->scheme_list[sj]=scheme0;
+  }
+}
+
+
+
+/*The number of bits required to encode a super block run.
+  _run_count: The desired run count; must be positive and less than 4130.*/
+static int oc_sb_run_bits(int _run_count){
+  int i;
+  for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
+  return OC_SB_RUN_CODE_NBITS[i];
+}
+
+/*The number of bits required to encode a block run.
+  _run_count: The desired run count; must be positive and less than 30.*/
+static int oc_block_run_bits(int _run_count){
+  return OC_BLOCK_RUN_CODE_NBITS[_run_count-1];
+}
+
+
+
+/*State to track coded block flags and their bit cost.*/
+struct oc_fr_state{
+  ptrdiff_t  bits;
+  unsigned   sb_partial_count:16;
+  unsigned   sb_full_count:16;
+  unsigned   b_coded_count_prev:8;
+  unsigned   b_coded_count:8;
+  unsigned   b_count:8;
+  signed int sb_partial:2;
+  signed int sb_full:2;
+  signed int b_coded_prev:2;
+  signed int b_coded:2;
+};
+
+
+
+static void oc_fr_state_init(oc_fr_state *_fr){
+  _fr->bits=0;
+  _fr->sb_partial_count=0;
+  _fr->sb_full_count=0;
+  _fr->b_coded_count_prev=0;
+  _fr->b_coded_count=0;
+  _fr->b_count=0;
+  _fr->sb_partial=-1;
+  _fr->sb_full=-1;
+  _fr->b_coded_prev=-1;
+  _fr->b_coded=-1;
+}
+
+
+static void oc_fr_state_advance_sb(oc_fr_state *_fr,
+ int _sb_partial,int _sb_full){
+  ptrdiff_t bits;
+  int       sb_partial_count;
+  int       sb_full_count;
+  bits=_fr->bits;
+  /*Extend the sb_partial run, or start a new one.*/
+  sb_partial_count=_fr->sb_partial;
+  if(_fr->sb_partial==_sb_partial){
+    if(sb_partial_count>=4129){
+      bits++;
+      sb_partial_count=0;
+    }
+    else bits-=oc_sb_run_bits(sb_partial_count);
+  }
+  else sb_partial_count=0;
+  sb_partial_count++;
+  bits+=oc_sb_run_bits(sb_partial_count);
+  if(!_sb_partial){
+    /*Extend the sb_full run, or start a new one.*/
+    sb_full_count=_fr->sb_full_count;
+    if(_fr->sb_full==_sb_full){
+      if(sb_full_count>=4129){
+        bits++;
+        sb_full_count=0;
+      }
+      else bits-=oc_sb_run_bits(sb_full_count);
+    }
+    else sb_full_count=0;
+    sb_full_count++;
+    bits+=oc_sb_run_bits(sb_full_count);
+    _fr->sb_full=_sb_full;
+    _fr->sb_full_count=sb_full_count;
+  }
+  _fr->bits=bits;
+  _fr->sb_partial=_sb_partial;
+  _fr->sb_partial_count=sb_partial_count;
+}
+
+/*Flush any outstanding block flags for a SB (e.g., one with fewer than 16
+   blocks).*/
+static void oc_fr_state_flush_sb(oc_fr_state *_fr){
+  ptrdiff_t bits;
+  int       sb_partial;
+  int       sb_full=sb_full;
+  int       b_coded_count;
+  int       b_coded;
+  int       b_count;
+  b_count=_fr->b_count;
+  if(b_count>0){
+    bits=_fr->bits;
+    b_coded=_fr->b_coded;
+    b_coded_count=_fr->b_coded_count;
+    if(b_coded_count>=b_count){
+      /*This SB was fully coded/uncoded; roll back the partial block flags.*/
+      bits-=oc_block_run_bits(b_coded_count);
+      if(b_coded_count>b_count)bits+=oc_block_run_bits(b_coded_count-b_count);
+      sb_partial=0;
+      sb_full=b_coded;
+      b_coded=_fr->b_coded_prev;
+      b_coded_count=_fr->b_coded_count_prev;
+    }
+    else{
+      /*It was partially coded.*/
+      sb_partial=1;
+      /*sb_full is unused.*/
+    }
+    _fr->bits=bits;
+    _fr->b_coded_count=b_coded_count;
+    _fr->b_coded_count_prev=b_coded_count;
+    _fr->b_count=0;
+    _fr->b_coded=b_coded;
+    _fr->b_coded_prev=b_coded;
+    oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
+  }
+}
+
+static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){
+  ptrdiff_t bits;
+  int       b_coded_count;
+  int       b_count;
+  int       sb_partial;
+  int       sb_full=sb_full;
+  bits=_fr->bits;
+  /*Extend the b_coded run, or start a new one.*/
+  b_coded_count=_fr->b_coded_count;
+  if(_fr->b_coded==_b_coded)bits-=oc_block_run_bits(b_coded_count);
+  else b_coded_count=0;
+  b_coded_count++;
+  b_count=_fr->b_count+1;
+  if(b_count>=16){
+    /*We finished a superblock.*/
+    if(b_coded_count>=16){
+      /*It was fully coded/uncoded; roll back the partial block flags.*/
+      if(b_coded_count>16)bits+=oc_block_run_bits(b_coded_count-16);
+      sb_partial=0;
+      sb_full=_b_coded;
+      _b_coded=_fr->b_coded_prev;
+      b_coded_count=_fr->b_coded_count_prev;
+    }
+    else{
+      bits+=oc_block_run_bits(b_coded_count);
+      /*It was partially coded.*/
+      sb_partial=1;
+      /*sb_full is unused.*/
+    }
+    _fr->bits=bits;
+    _fr->b_coded_count=b_coded_count;
+    _fr->b_coded_count_prev=b_coded_count;
+    _fr->b_count=0;
+    _fr->b_coded=_b_coded;
+    _fr->b_coded_prev=_b_coded;
+    oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
+  }
+  else{
+    bits+=oc_block_run_bits(b_coded_count);
+    _fr->bits=bits;
+    _fr->b_coded_count=b_coded_count;
+    _fr->b_count=b_count;
+    _fr->b_coded=_b_coded;
+  }
+}
+
+static void oc_fr_skip_block(oc_fr_state *_fr){
+  oc_fr_state_advance_block(_fr,0);
+}
+
+static void oc_fr_code_block(oc_fr_state *_fr){
+  oc_fr_state_advance_block(_fr,1);
+}
+
+static int oc_fr_cost1(const oc_fr_state *_fr){
+  oc_fr_state tmp;
+  ptrdiff_t   bits;
+  *&tmp=*_fr;
+  oc_fr_skip_block(&tmp);
+  bits=tmp.bits;
+  *&tmp=*_fr;
+  oc_fr_code_block(&tmp);
+  return (int)(tmp.bits-bits);
+}
+
+static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){
+  oc_fr_state tmp;
+  *&tmp=*_pre;
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  return (int)(_post->bits-tmp.bits);
+}
+
+
+
+struct oc_qii_state{
+  ptrdiff_t  bits;
+  unsigned   qi01_count:14;
+  signed int qi01:2;
+  unsigned   qi12_count:14;
+  signed int qi12:2;
+};
+
+
+
+static void oc_qii_state_init(oc_qii_state *_qs){
+  _qs->bits=0;
+  _qs->qi01_count=0;
+  _qs->qi01=-1;
+  _qs->qi12_count=0;
+  _qs->qi12=-1;
+}
+
+
+static void oc_qii_state_advance(oc_qii_state *_qd,
+ const oc_qii_state *_qs,int _qii){
+  ptrdiff_t bits;
+  int       qi01;
+  int       qi01_count;
+  int       qi12;
+  int       qi12_count;
+  bits=_qs->bits;
+  qi01=_qii+1>>1;
+  qi01_count=_qs->qi01_count;
+  if(qi01==_qs->qi01){
+    if(qi01_count>=4129){
+      bits++;
+      qi01_count=0;
+    }
+    else bits-=oc_sb_run_bits(qi01_count);
+  }
+  else qi01_count=0;
+  qi01_count++;
+  bits+=oc_sb_run_bits(qi01_count);
+  qi12_count=_qs->qi12_count;
+  if(_qii){
+    qi12=_qii>>1;
+    if(qi12==_qs->qi12){
+      if(qi12_count>=4129){
+        bits++;
+        qi12_count=0;
+      }
+      else bits-=oc_sb_run_bits(qi12_count);
+    }
+    else qi12_count=0;
+    qi12_count++;
+    bits+=oc_sb_run_bits(qi12_count);
+  }
+  else qi12=_qs->qi12;
+  _qd->bits=bits;
+  _qd->qi01=qi01;
+  _qd->qi01_count=qi01_count;
+  _qd->qi12=qi12;
+  _qd->qi12_count=qi12_count;
+}
+
+
+
+/*Temporary encoder state for the analysis pipeline.*/
+struct oc_enc_pipeline_state{
+  int                 bounding_values[256];
+  oc_fr_state         fr[3];
+  oc_qii_state        qs[3];
+  /*Condensed dequantization tables.*/
+  const ogg_uint16_t *dequant[3][3][2];
+  /*Condensed quantization tables.*/
+  const oc_iquant    *enquant[3][3][2];
+  /*Skip SSD storage for the current MCU in each plane.*/
+  unsigned           *skip_ssd[3];
+  /*Coded/uncoded fragment lists for each plane for the current MCU.*/
+  ptrdiff_t          *coded_fragis[3];
+  ptrdiff_t          *uncoded_fragis[3];
+  ptrdiff_t           ncoded_fragis[3];
+  ptrdiff_t           nuncoded_fragis[3];
+  /*The starting fragment for the current MCU in each plane.*/
+  ptrdiff_t           froffset[3];
+  /*The starting row for the current MCU in each plane.*/
+  int                 fragy0[3];
+  /*The ending row for the current MCU in each plane.*/
+  int                 fragy_end[3];
+  /*The starting superblock for the current MCU in each plane.*/
+  unsigned            sbi0[3];
+  /*The ending superblock for the current MCU in each plane.*/
+  unsigned            sbi_end[3];
+  /*The number of tokens for zzi=1 for each color plane.*/
+  int                 ndct_tokens1[3];
+  /*The outstanding eob_run count for zzi=1 for each color plane.*/
+  int                 eob_run1[3];
+  /*Whether or not the loop filter is enabled.*/
+  int                 loop_filter;
+};
+
+
+static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){
+  ptrdiff_t *coded_fragis;
+  unsigned   mcu_nvsbs;
+  ptrdiff_t  mcu_nfrags;
+  int        hdec;
+  int        vdec;
+  int        pli;
+  int        qii;
+  int        qti;
+  /*Initialize the per-plane coded block flag trackers.
+    These are used for bit-estimation purposes only; the real flag bits span
+     all three planes, so we can't compute them in parallel.*/
+  for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli);
+  for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli);
+  /*Set up the per-plane skip SSD storage pointers.*/
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16;
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  _pipe->skip_ssd[0]=_enc->mcu_skip_ssd;
+  _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags;
+  _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec);
+  /*Set up per-plane pointers to the coded and uncoded fragments lists.
+    Unlike the decoder, each planes' coded and uncoded fragment list is kept
+     separate during the analysis stage; we only make the coded list for all
+     three planes contiguous right before the final packet is output
+     (destroying the uncoded lists, which are no longer needed).*/
+  coded_fragis=_enc->state.coded_fragis;
+  for(pli=0;pli<3;pli++){
+    _pipe->coded_fragis[pli]=coded_fragis;
+    coded_fragis+=_enc->state.fplanes[pli].nfrags;
+    _pipe->uncoded_fragis[pli]=coded_fragis;
+  }
+  memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis));
+  memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis));
+  /*Set up condensed quantizer tables.*/
+  for(pli=0;pli<3;pli++){
+    for(qii=0;qii<_enc->state.nqis;qii++){
+      int qi;
+      qi=_enc->state.qis[qii];
+      for(qti=0;qti<2;qti++){
+        _pipe->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti];
+        _pipe->enquant[pli][qii][qti]=_enc->enquant_tables[qi][pli][qti];
+      }
+    }
+  }
+  /*Initialize the tokenization state.*/
+  for(pli=0;pli<3;pli++){
+    _pipe->ndct_tokens1[pli]=0;
+    _pipe->eob_run1[pli]=0;
+  }
+  /*Initialize the bounding value array for the loop filter.*/
+  _pipe->loop_filter=!oc_state_loop_filter_init(&_enc->state,
+   _pipe->bounding_values);
+}
+
+/*Sets the current MCU stripe to super block row _sby.
+  Return: A non-zero value if this was the last MCU.*/
+static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _sby){
+  const oc_fragment_plane *fplane;
+  unsigned                 mcu_nvsbs;
+  int                      sby_end;
+  int                      notdone;
+  int                      vdec;
+  int                      pli;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  sby_end=_enc->state.fplanes[0].nvsbs;
+  notdone=_sby+mcu_nvsbs<sby_end;
+  if(notdone)sby_end=_sby+mcu_nvsbs;
+  vdec=0;
+  for(pli=0;pli<3;pli++){
+    fplane=_enc->state.fplanes+pli;
+    _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs;
+    _pipe->fragy0[pli]=_sby<<2-vdec;
+    _pipe->froffset[pli]=fplane->froffset
+     +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags;
+    if(notdone){
+      _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs;
+      _pipe->fragy_end[pli]=sby_end<<2-vdec;
+    }
+    else{
+      _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs;
+      _pipe->fragy_end[pli]=fplane->nvfrags;
+    }
+    vdec=!(_enc->state.info.pixel_fmt&2);
+  }
+  return notdone;
+}
+
+static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){
+  int refi;
+  /*Copy over all the uncoded fragments from this plane and advance the uncoded
+     fragment list.*/
+  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+  oc_state_frag_copy_list(&_enc->state,_pipe->uncoded_fragis[_pli],
+   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
+  _pipe->nuncoded_fragis[_pli]=0;
+  /*Perform DC prediction.*/
+  oc_enc_pred_dc_frag_rows(_enc,_pli,
+   _pipe->fragy0[_pli],_pipe->fragy_end[_pli]);
+  /*Finish DC tokenization.*/
+  oc_enc_tokenize_dc_frag_list(_enc,_pli,
+   _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli],
+   _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]);
+  _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1];
+  _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1];
+  /*And advance the coded fragment list.*/
+  _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+  _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+  _pipe->ncoded_fragis[_pli]=0;
+  /*Apply the loop filter if necessary.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  if(_pipe->loop_filter){
+    oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values,
+     refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay);
+  }
+  else _sdelay=_edelay=0;
+  /*To fill borders, we have an additional two pixel delay, since a fragment
+     in the next row could filter its top edge, using two pixels from a
+     fragment in this row.
+    But there's no reason to delay a full fragment between the two.*/
+  oc_state_borders_fill_rows(&_enc->state,refi,_pli,
+   (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1),
+   (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1));
+}
+
+
+
+/*Cost information about the coded blocks in a MB.*/
+struct oc_rd_metric{
+  int uncoded_ac_ssd;
+  int coded_ac_ssd;
+  int ac_bits;
+  int dc_flag;
+};
+
+
+
+static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,int _overhead_bits,
+ oc_rd_metric *_mo,oc_token_checkpoint **_stack){
+  OC_ALIGN16(ogg_int16_t  dct[64]);
+  OC_ALIGN16(ogg_int16_t  data[64]);
+  ogg_uint16_t            dc_dequant;
+  const ogg_uint16_t     *dequant;
+  const oc_iquant        *enquant;
+  ptrdiff_t               frag_offs;
+  int                     ystride;
+  const unsigned char    *src;
+  const unsigned char    *ref;
+  unsigned char          *dst;
+  int                     frame_type;
+  int                     nonzero;
+  unsigned                uncoded_ssd;
+  unsigned                coded_ssd;
+  int                     coded_dc;
+  oc_token_checkpoint    *checkpoint;
+  oc_fragment            *frags;
+  int                     mb_mode;
+  int                     mv_offs[2];
+  int                     nmv_offs;
+  int                     ac_bits;
+  int                     borderi;
+  int                     qti;
+  int                     qii;
+  int                     pi;
+  int                     zzi;
+  int                     v;
+  int                     val;
+  int                     d;
+  int                     s;
+  int                     dc;
+  frags=_enc->state.frags;
+  frag_offs=_enc->state.frag_buf_offs[_fragi];
+  ystride=_enc->state.ref_ystride[_pli];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs;
+  borderi=frags[_fragi].borderi;
+  qii=frags[_fragi].qii;
+  if(qii&~3){
+#if !defined(OC_COLLECT_METRICS)
+    if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){
+      /*Enable early skip detection.*/
+      frags[_fragi].coded=0;
+      return 0;
+    }
+#endif
+    /*Try and code this block anyway.*/
+    qii&=3;
+    frags[_fragi].qii=qii;
+  }
+  mb_mode=frags[_fragi].mb_mode;
+  ref=_enc->state.ref_frame_data[
+   _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs;
+  dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]]
+   +frag_offs;
+  /*Motion compensation:*/
+  switch(mb_mode){
+    case OC_MODE_INTRA:{
+      nmv_offs=0;
+      oc_enc_frag_sub_128(_enc,data,src,ystride);
+    }break;
+    case OC_MODE_GOLDEN_NOMV:
+    case OC_MODE_INTER_NOMV:{
+      nmv_offs=1;
+      mv_offs[0]=0;
+      oc_enc_frag_sub(_enc,data,src,ref,ystride);
+    }break;
+    default:{
+      const oc_mv *frag_mvs;
+      frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
+      nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli,
+       frag_mvs[_fragi][0],frag_mvs[_fragi][1]);
+      if(nmv_offs>1){
+        oc_enc_frag_copy2(_enc,dst,
+         ref+mv_offs[0],ref+mv_offs[1],ystride);
+        oc_enc_frag_sub(_enc,data,src,dst,ystride);
+      }
+      else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride);
+    }break;
+  }
+#if defined(OC_COLLECT_METRICS)
+  {
+    unsigned satd;
+    switch(nmv_offs){
+      case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break;
+      case 1:{
+        satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX);
+      }break;
+      default:{
+        satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX);
+      }
+    }
+    _enc->frag_satd[_fragi]=satd;
+  }
+#endif
+  /*Transform:*/
+  oc_enc_fdct8x8(_enc,dct,data);
+  /*Quantize the DC coefficient:*/
+  qti=mb_mode!=OC_MODE_INTRA;
+  enquant=_pipe->enquant[_pli][0][qti];
+  dc_dequant=_pipe->dequant[_pli][0][qti][0];
+  v=dct[0];
+  val=v<<1;
+  s=OC_SIGNMASK(val);
+  val+=dc_dequant+s^s;
+  val=((enquant[0].m*(ogg_int32_t)val>>16)+val>>enquant[0].l)-s;
+  dc=OC_CLAMPI(-580,val,580);
+  nonzero=0;
+  /*Quantize the AC coefficients:*/
+  dequant=_pipe->dequant[_pli][qii][qti];
+  enquant=_pipe->enquant[_pli][qii][qti];
+  for(zzi=1;zzi<64;zzi++){
+    v=dct[OC_FZIG_ZAG[zzi]];
+    d=dequant[zzi];
+    val=v<<1;
+    v=abs(val);
+    if(v>=d){
+      s=OC_SIGNMASK(val);
+      /*The bias added here rounds ties away from zero, since token
+         optimization can only decrease the magnitude of the quantized
+         value.*/
+      val+=d+s^s;
+      /*Note the arithmetic right shift is not guaranteed by ANSI C.
+        Hopefully no one still uses ones-complement architectures.*/
+      val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s;
+      data[zzi]=OC_CLAMPI(-580,val,580);
+      nonzero=zzi;
+    }
+    else data[zzi]=0;
+  }
+  /*Tokenize.*/
+  checkpoint=*_stack;
+  ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
+   _stack,qti?0:3);
+  /*Reconstruct.
+    TODO: nonzero may need to be adjusted after tokenization.*/
+  if(nonzero==0){
+    ogg_int16_t p;
+    int         ci;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(dc*(ogg_int32_t)dc_dequant+15>>5);
+    /*LOOP VECTORIZES.*/
+    for(ci=0;ci<64;ci++)data[ci]=p;
+  }
+  else{
+    data[0]=dc*dc_dequant;
+    oc_idct8x8(&_enc->state,data,nonzero+1);
+  }
+  if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data);
+  else{
+    oc_enc_frag_recon_inter(_enc,dst,
+     nmv_offs==1?ref+mv_offs[0]:dst,ystride,data);
+  }
+  frame_type=_enc->state.frame_type;
+#if !defined(OC_COLLECT_METRICS)
+  if(frame_type!=OC_INTRA_FRAME)
+#endif
+  {
+    /*In retrospect, should we have skipped this block?*/
+    oc_enc_frag_sub(_enc,data,src,dst,ystride);
+    coded_ssd=coded_dc=0;
+    if(borderi<0){
+      for(pi=0;pi<64;pi++){
+        coded_ssd+=data[pi]*data[pi];
+        coded_dc+=data[pi];
+      }
+    }
+    else{
+      ogg_int64_t mask;
+      mask=_enc->state.borders[borderi].mask;
+      for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+        coded_ssd+=data[pi]*data[pi];
+        coded_dc+=data[pi];
+      }
+    }
+    /*Scale to match DCT domain.*/
+    coded_ssd<<=4;
+    /*We actually only want the AC contribution to the SSD.*/
+    coded_ssd-=coded_dc*coded_dc>>2;
+#if defined(OC_COLLECT_METRICS)
+    _enc->frag_ssd[_fragi]=coded_ssd;
+  }
+  if(frame_type!=OC_INTRA_FRAME){
+#endif
+    uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]];
+    if(uncoded_ssd<UINT_MAX){
+      /*Although the fragment coding overhead determination is accurate, it is
+         greedy, using very coarse-grained local information.
+        Allowing it to mildly discourage coding turns out to be beneficial, but
+         it's not clear that allowing it to encourage coding through negative
+         coding overhead deltas is useful.
+        For that reason, we disallow negative coding_overheads.*/
+      if(_overhead_bits<0)_overhead_bits=0;
+      if(uncoded_ssd<=coded_ssd+(_overhead_bits+ac_bits)*_enc->lambda&&
+       /*Don't allow luma blocks to be skipped in 4MV mode when VP3
+          compatibility is enabled.*/
+       (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){
+        /*Hm, not worth it; roll back.*/
+        oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint);
+        *_stack=checkpoint;
+        frags[_fragi].coded=0;
+        return 0;
+      }
+    }
+    else _mo->dc_flag=1;
+    _mo->uncoded_ac_ssd+=uncoded_ssd;
+    _mo->coded_ac_ssd+=coded_ssd;
+    _mo->ac_bits+=ac_bits;
+  }
+  oc_qii_state_advance(_pipe->qs+_pli,_pipe->qs+_pli,qii);
+  frags[_fragi].dc=dc;
+  frags[_fragi].coded=1;
+  return 1;
+}
+
+static int oc_enc_mb_transform_quantize_luma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead){
+  /*Worst case token stack usage for 4 fragments.*/
+  oc_token_checkpoint  stack[64*4];
+  oc_token_checkpoint *stackptr;
+  const oc_sb_map     *sb_maps;
+  signed char         *mb_modes;
+  oc_fragment         *frags;
+  ptrdiff_t           *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t           *uncoded_fragis;
+  ptrdiff_t            nuncoded_fragis;
+  oc_rd_metric         mo;
+  oc_fr_state          fr_checkpoint;
+  oc_qii_state         qs_checkpoint;
+  int                  mb_mode;
+  int                  ncoded;
+  ptrdiff_t            fragi;
+  int                  bi;
+  *&fr_checkpoint=*(_pipe->fr+0);
+  *&qs_checkpoint=*(_pipe->qs+0);
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  mb_modes=_enc->state.mb_modes;
+  frags=_enc->state.frags;
+  coded_fragis=_pipe->coded_fragis[0];
+  ncoded_fragis=_pipe->ncoded_fragis[0];
+  uncoded_fragis=_pipe->uncoded_fragis[0];
+  nuncoded_fragis=_pipe->nuncoded_fragis[0];
+  mb_mode=mb_modes[_mbi];
+  ncoded=0;
+  stackptr=stack;
+  memset(&mo,0,sizeof(mo));
+  for(bi=0;bi<4;bi++){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frags[fragi].mb_mode=mb_mode;
+    if(oc_enc_block_transform_quantize(_enc,
+     _pipe,0,fragi,oc_fr_cost1(_pipe->fr+0),&mo,&stackptr)){
+      oc_fr_code_block(_pipe->fr+0);
+      coded_fragis[ncoded_fragis++]=fragi;
+      ncoded++;
+    }
+    else{
+      *(uncoded_fragis-++nuncoded_fragis)=fragi;
+      oc_fr_skip_block(_pipe->fr+0);
+    }
+  }
+  if(_enc->state.frame_type!=OC_INTRA_FRAME){
+    if(ncoded>0&&!mo.dc_flag){
+      int cost;
+      /*Some individual blocks were worth coding.
+        See if that's still true when accounting for mode and MV overhead.*/
+      cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits
+       +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead);
+      if(mo.uncoded_ac_ssd<=cost){
+        /*Taking macroblock overhead into account, it is not worth coding this
+           MB.*/
+        oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack);
+        *(_pipe->fr+0)=*&fr_checkpoint;
+        *(_pipe->qs+0)=*&qs_checkpoint;
+        for(bi=0;bi<4;bi++){
+          fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+          if(frags[fragi].coded){
+            *(uncoded_fragis-++nuncoded_fragis)=fragi;
+            frags[fragi].coded=0;
+          }
+          oc_fr_skip_block(_pipe->fr+0);
+        }
+        ncoded_fragis-=ncoded;
+        ncoded=0;
+      }
+    }
+    /*If no luma blocks coded, the mode is forced.*/
+    if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV;
+    /*Assume that a 1MV with a single coded block is always cheaper than a 4MV
+       with a single coded block.
+      This may not be strictly true: a 4MV computes chroma MVs using (0,0) for
+       skipped blocks, while a 1MV does not.*/
+    else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){
+      mb_modes[_mbi]=OC_MODE_INTER_MV;
+    }
+  }
+  _pipe->ncoded_fragis[0]=ncoded_fragis;
+  _pipe->nuncoded_fragis[0]=nuncoded_fragis;
+  return ncoded;
+}
+
+static void oc_enc_sb_transform_quantize_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+  const oc_sb_map *sb_maps;
+  oc_sb_flags     *sb_flags;
+  ptrdiff_t       *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t       *uncoded_fragis;
+  ptrdiff_t        nuncoded_fragis;
+  int              sbi;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  uncoded_fragis=_pipe->uncoded_fragis[_pli];
+  nuncoded_fragis=_pipe->nuncoded_fragis[_pli];
+  for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+    /*Worst case token stack usage for 1 fragment.*/
+    oc_token_checkpoint stack[64];
+    oc_rd_metric        mo;
+    int                 quadi;
+    int                 bi;
+    memset(&mo,0,sizeof(mo));
+    for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+      ptrdiff_t fragi;
+      fragi=sb_maps[sbi][quadi][bi];
+      if(fragi>=0){
+        oc_token_checkpoint *stackptr;
+        stackptr=stack;
+        if(oc_enc_block_transform_quantize(_enc,
+         _pipe,_pli,fragi,oc_fr_cost1(_pipe->fr+_pli),&mo,&stackptr)){
+          coded_fragis[ncoded_fragis++]=fragi;
+          oc_fr_code_block(_pipe->fr+_pli);
+        }
+        else{
+          *(uncoded_fragis-++nuncoded_fragis)=fragi;
+          oc_fr_skip_block(_pipe->fr+_pli);
+        }
+      }
+    }
+    oc_fr_state_flush_sb(_pipe->fr+_pli);
+    sb_flags[sbi].coded_fully=_pipe->fr[_pli].sb_full;
+    sb_flags[sbi].coded_partially=_pipe->fr[_pli].sb_partial;
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  _pipe->nuncoded_fragis[_pli]=nuncoded_fragis;
+}
+
+/*Mode decision is done by exhaustively examining all potential choices.
+  Obviously, doing the motion compensation, fDCT, tokenization, and then
+   counting the bits each token uses is computationally expensive.
+  Theora's EOB runs can also split the cost of these tokens across multiple
+   fragments, and naturally we don't know what the optimal choice of Huffman
+   codes will be until we know all the tokens we're going to encode in all the
+   fragments.
+  So we use a simple approach to estimating the bit cost and distortion of each
+   mode based upon the SATD value of the residual before coding.
+  The mathematics behind the technique are outlined by Kim \cite{Kim03}, but
+   the process (modified somewhat from that of the paper) is very simple.
+  We build a non-linear regression of the mappings from
+   (pre-transform+quantization) SATD to (post-transform+quantization) bits and
+   SSD for each qi.
+  A separate set of mappings is kept for each quantization type and color
+   plane.
+  The mappings are constructed by partitioning the SATD values into a small
+   number of bins (currently 24) and using a linear regression in each bin
+   (as opposed to the 0th-order regression used by Kim).
+  The bit counts and SSD measurements are obtained by examining actual encoded
+   frames, with appropriate lambda values and optimal Huffman codes selected.
+  EOB bits are assigned to the fragment that started the EOB run (as opposed to
+   dividing them among all the blocks in the run; though the latter approach
+   seems more theoretically correct, Monty's testing showed a small improvement
+   with the former, though that may have been merely statistical noise).
+
+  @ARTICLE{Kim03,
+    author="Hyun Mun Kim",
+    title="Adaptive Rate Control Using Nonlinear Regression",
+    journal="IEEE Transactions on Circuits and Systems for Video Technology",
+    volume=13,
+    number=5,
+    pages="432--439",
+    month=May,
+    year=2003
+  }*/
+
+/*Computes (_ssd+_lambda*_rate)/(1<<OC_BIT_SCALE) with rounding, avoiding
+   overflow for large lambda values.*/
+#define OC_MODE_RD_COST(_ssd,_rate,_lambda) \
+ ((_ssd)>>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \
+ +(((_ssd)&(1<<OC_BIT_SCALE)-1)+((_rate)&(1<<OC_BIT_SCALE)-1)*(_lambda) \
+ +((1<<OC_BIT_SCALE)>>1)>>OC_BIT_SCALE)
+
+/*Estimate the R-D cost of the DCT coefficients given the SATD of a block after
+   prediction.*/
+static unsigned oc_dct_cost2(unsigned *_ssd,
+ int _qi,int _pli,int _qti,int _satd){
+  unsigned rmse;
+  int      bin;
+  int      dx;
+  int      y0;
+  int      z0;
+  int      dy;
+  int      dz;
+  /*SATD metrics for chroma planes vary much less than luma, so we scale them
+     by 4 to distribute them into the mode decision bins more evenly.*/
+  _satd<<=_pli+1&2;
+  bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2);
+  dx=_satd-(bin<<OC_SAD_SHIFT);
+  y0=OC_MODE_RD[_qi][_pli][_qti][bin].rate;
+  z0=OC_MODE_RD[_qi][_pli][_qti][bin].rmse;
+  dy=OC_MODE_RD[_qi][_pli][_qti][bin+1].rate-y0;
+  dz=OC_MODE_RD[_qi][_pli][_qti][bin+1].rmse-z0;
+  rmse=OC_MAXI(z0+(dz*dx>>OC_SAD_SHIFT),0);
+  *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE;
+  return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0);
+}
+
+/*Select luma block-level quantizers for a MB in an INTRA frame.*/
+static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,unsigned _mbi){
+  const unsigned char *src;
+  const ptrdiff_t     *frag_buf_offs;
+  const oc_sb_map     *sb_maps;
+  oc_fragment         *frags;
+  ptrdiff_t            frag_offs;
+  ptrdiff_t            fragi;
+  oc_qii_state         qs[4][3];
+  unsigned             cost[4][3];
+  unsigned             ssd[4][3];
+  unsigned             rate[4][3];
+  int                  prev[3][3];
+  unsigned             satd;
+  unsigned             best_cost;
+  unsigned             best_ssd;
+  unsigned             best_rate;
+  int                  best_qii;
+  int                  qii;
+  int                  lambda;
+  int                  ystride;
+  int                  nqis;
+  int                  bi;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[0];
+  fragi=sb_maps[_mbi>>2][_mbi&3][0];
+  frag_offs=frag_buf_offs[fragi];
+  satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  nqis=_enc->state.nqis;
+  lambda=_enc->lambda;
+  for(qii=0;qii<nqis;qii++){
+    oc_qii_state_advance(qs[0]+qii,_qs,qii);
+    rate[0][qii]=oc_dct_cost2(ssd[0]+qii,_enc->state.qis[qii],0,0,satd)
+     +(qs[0][qii].bits-_qs->bits<<OC_BIT_SCALE);
+    cost[0][qii]=OC_MODE_RD_COST(ssd[0][qii],rate[0][qii],lambda);
+  }
+  for(bi=1;bi<4;bi++){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frag_offs=frag_buf_offs[fragi];
+    satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+    for(qii=0;qii<nqis;qii++){
+      oc_qii_state qt[3];
+      unsigned     cur_ssd;
+      unsigned     cur_rate;
+      int          best_qij;
+      int          qij;
+      oc_qii_state_advance(qt+0,qs[bi-1]+0,qii);
+      cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,0,satd);
+      best_ssd=ssd[bi-1][0]+cur_ssd;
+      best_rate=rate[bi-1][0]+cur_rate
+       +(qt[0].bits-qs[bi-1][0].bits<<OC_BIT_SCALE);
+      best_cost=OC_MODE_RD_COST(best_ssd,best_rate,lambda);
+      best_qij=0;
+      for(qij=1;qij<nqis;qij++){
+        unsigned chain_ssd;
+        unsigned chain_rate;
+        unsigned chain_cost;
+        oc_qii_state_advance(qt+qij,qs[bi-1]+qij,qii);
+        chain_ssd=ssd[bi-1][qij]+cur_ssd;
+        chain_rate=rate[bi-1][qij]+cur_rate
+         +(qt[qij].bits-qs[bi-1][qij].bits<<OC_BIT_SCALE);
+        chain_cost=OC_MODE_RD_COST(chain_ssd,chain_rate,lambda);
+        if(chain_cost<best_cost){
+          best_cost=chain_cost;
+          best_ssd=chain_ssd;
+          best_rate=chain_rate;
+          best_qij=qij;
+        }
+      }
+      *(qs[bi]+qii)=*(qt+best_qij);
+      cost[bi][qii]=best_cost;
+      ssd[bi][qii]=best_ssd;
+      rate[bi][qii]=best_rate;
+      prev[bi-1][qii]=best_qij;
+    }
+  }
+  best_qii=0;
+  best_cost=cost[3][0];
+  for(qii=1;qii<nqis;qii++){
+    if(cost[3][qii]<best_cost){
+      best_cost=cost[3][qii];
+      best_qii=qii;
+    }
+  }
+  frags=_enc->state.frags;
+  for(bi=3;;){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frags[fragi].qii=best_qii;
+    if(bi--<=0)break;
+    best_qii=prev[bi][best_qii];
+  }
+  return best_cost;
+}
+
+/*Select a block-level quantizer for a single chroma block in an INTRA frame.*/
+static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi){
+  const unsigned char *src;
+  oc_fragment         *frags;
+  ptrdiff_t            frag_offs;
+  oc_qii_state         qt[3];
+  unsigned             cost[3];
+  unsigned             satd;
+  unsigned             best_cost;
+  int                  best_qii;
+  int                  qii;
+  int                  lambda;
+  int                  ystride;
+  int                  nqis;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[_pli];
+  frag_offs=_enc->state.frag_buf_offs[_fragi];
+  satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  nqis=_enc->state.nqis;
+  lambda=_enc->lambda;
+  best_qii=0;
+  for(qii=0;qii<nqis;qii++){
+    unsigned cur_rate;
+    unsigned cur_ssd;
+    oc_qii_state_advance(qt+qii,_qs,qii);
+    cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],_pli,0,satd)
+     +(qt[qii].bits-_qs->bits<<OC_BIT_SCALE);
+    cost[qii]=OC_MODE_RD_COST(cur_ssd,cur_rate,lambda);
+  }
+  best_cost=cost[0];
+  for(qii=1;qii<nqis;qii++){
+    if(cost[qii]<best_cost){
+      best_cost=cost[qii];
+      best_qii=qii;
+    }
+  }
+  frags=_enc->state.frags;
+  frags[_fragi].qii=best_qii;
+  return best_cost;
+}
+
+static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+  const oc_sb_map *sb_maps;
+  oc_sb_flags     *sb_flags;
+  ptrdiff_t       *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  int              sbi;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+    /*Worst case token stack usage for 1 fragment.*/
+    oc_token_checkpoint stack[64];
+    int                 quadi;
+    int                 bi;
+    for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+      ptrdiff_t fragi;
+      fragi=sb_maps[sbi][quadi][bi];
+      if(fragi>=0){
+        oc_token_checkpoint *stackptr;
+        oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi);
+        stackptr=stack;
+        oc_enc_block_transform_quantize(_enc,
+         _pipe,_pli,fragi,0,NULL,&stackptr);
+        coded_fragis[ncoded_fragis++]=fragi;
+      }
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+}
+
+/*Analysis stage for an INTRA frame.*/
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){
+  oc_enc_pipeline_state   pipe;
+  const unsigned char    *map_idxs;
+  int                     nmap_idxs;
+  oc_sb_flags            *sb_flags;
+  signed char            *mb_modes;
+  const oc_mb_map        *mb_maps;
+  oc_mb_enc_info         *embs;
+  oc_fragment            *frags;
+  unsigned                stripe_sby;
+  unsigned                mcu_nvsbs;
+  int                     notstart;
+  int                     notdone;
+  int                     refi;
+  int                     pli;
+  _enc->state.frame_type=OC_INTRA_FRAME;
+  oc_enc_tokenize_start(_enc);
+  oc_enc_pipeline_init(_enc,&pipe);
+  /*Choose MVs and MB modes and quantize and code luma.
+    Must be done in Hilbert order.*/
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  _enc->state.ncoded_fragis[0]=0;
+  _enc->state.ncoded_fragis[1]=0;
+  _enc->state.ncoded_fragis[2]=0;
+  sb_flags=_enc->state.sb_flags;
+  mb_modes=_enc->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  embs=_enc->mb_info;
+  frags=_enc->state.frags;
+  notstart=0;
+  notdone=1;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+    unsigned sbi;
+    unsigned sbi_end;
+    notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
+    sbi_end=pipe.sbi_end[0];
+    for(sbi=pipe.sbi0[0];sbi<sbi_end;sbi++){
+      int quadi;
+      /*Mode addressing is through Y plane, always 4 MB per SB.*/
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        unsigned  mbi;
+        int       mapii;
+        int       mapi;
+        int       bi;
+        ptrdiff_t fragi;
+        mbi=sbi<<2|quadi;
+        /*Motion estimation:
+          We always do a basic 1MV search for all macroblocks, coded or not,
+           keyframe or not.*/
+        if(!_recode&&_enc->state.curframe_num>0)oc_mcenc_search(_enc,mbi);
+        oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi);
+        mb_modes[mbi]=OC_MODE_INTRA;
+        oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,0);
+        /*Propagate final MB mode and MVs to the chroma blocks.*/
+        for(mapii=4;mapii<nmap_idxs;mapii++){
+          mapi=map_idxs[mapii];
+          pli=mapi>>2;
+          bi=mapi&3;
+          fragi=mb_maps[mbi][pli][bi];
+          frags[fragi].mb_mode=OC_MODE_INTRA;
+        }
+      }
+    }
+    oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
+    /*Code chroma planes.*/
+    for(pli=1;pli<3;pli++){
+      oc_enc_sb_transform_quantize_intra_chroma(_enc,&pipe,
+       pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
+      oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
+    }
+    notstart=1;
+  }
+  /*Finish filling in the reference frame borders.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+  _enc->state.ntotal_coded_fragis=_enc->state.nfrags;
+}
+
+
+
+/*Cost information about a MB mode.*/
+struct oc_mode_choice{
+  unsigned      cost;
+  unsigned      ssd;
+  unsigned      rate;
+  unsigned      overhead;
+  unsigned char qii[12];
+};
+
+
+
+static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){
+  _modec->cost=OC_MODE_RD_COST(_modec->ssd,
+   _modec->rate+_modec->overhead,_lambda);
+}
+
+/*A set of skip SSD's to use to disable early skipping.*/
+static const unsigned OC_NOSKIP[12]={
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX
+};
+
+/*The estimated number of bits used by a coded chroma block to specify the AC
+   quantizer.
+  TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression);
+   measurements suggest this is in the right ballpark, but it varies somewhat
+   with lambda.*/
+#define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1)
+
+static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
+  oc_fr_state  fr;
+  oc_qii_state qs;
+  unsigned     ssd;
+  unsigned     rate;
+  int          overhead;
+  unsigned     satd;
+  unsigned     best_ssd;
+  unsigned     best_rate;
+  int          best_overhead;
+  int          best_fri;
+  int          best_qii;
+  unsigned     cur_cost;
+  unsigned     cur_ssd;
+  unsigned     cur_rate;
+  int          cur_overhead;
+  int          lambda;
+  int          nqis;
+  int          nskipped;
+  int          bi;
+  int          qii;
+  lambda=_enc->lambda;
+  nqis=_enc->state.nqis;
+  /*We could do a trellis optimization here, but we don't make final skip
+     decisions until after transform+quantization, so the result wouldn't be
+     optimal anyway.
+    Instead we just use a greedy approach; for most SATD values, the
+     differences between the qiis are large enough to drown out the cost to
+     code the flags, anyway.*/
+  *&fr=*_fr;
+  *&qs=*_qs;
+  ssd=rate=overhead=nskipped=0;
+  for(bi=0;bi<4;bi++){
+    oc_fr_state  ft[2];
+    oc_qii_state qt[3];
+    unsigned     best_cost;
+    satd=_frag_satd[bi];
+    *(ft+0)=*&fr;
+    oc_fr_code_block(ft+0);
+    oc_qii_state_advance(qt+0,&qs,0);
+    best_overhead=(ft[0].bits-fr.bits<<OC_BIT_SCALE);
+    best_rate=oc_dct_cost2(&best_ssd,_enc->state.qis[0],0,_qti,satd)
+     +(qt[0].bits-qs.bits<<OC_BIT_SCALE);
+    best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate+best_overhead,lambda);
+    best_fri=0;
+    best_qii=0;
+    for(qii=1;qii<nqis;qii++){
+      oc_qii_state_advance(qt+qii,&qs,qii);
+      cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,_qti,satd)
+       +(qt[qii].bits-qs.bits<<OC_BIT_SCALE);
+      cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate+best_overhead,lambda);
+      if(cur_cost<best_cost){
+        best_cost=cur_cost;
+        best_ssd=cur_ssd;
+        best_rate=cur_rate;
+        best_qii=qii;
+      }
+    }
+    if(_skip_ssd[bi]<UINT_MAX&&nskipped<3){
+      *(ft+1)=*&fr;
+      oc_fr_skip_block(ft+1);
+      cur_overhead=ft[1].bits-fr.bits<<OC_BIT_SCALE;
+      cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
+      cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_overhead,lambda);
+      if(cur_cost<=best_cost){
+        best_ssd=cur_ssd;
+        best_rate=0;
+        best_overhead=cur_overhead;
+        best_fri=1;
+        best_qii+=4;
+      }
+    }
+    rate+=best_rate;
+    ssd+=best_ssd;
+    overhead+=best_overhead;
+    *&fr=*(ft+best_fri);
+    if(best_fri==0)*&qs=*(qt+best_qii);
+    else nskipped++;
+    _modec->qii[bi]=best_qii;
+  }
+  _modec->ssd=ssd;
+  _modec->rate=rate;
+  _modec->overhead=OC_MAXI(overhead,0);
+}
+
+static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
+  unsigned ssd;
+  unsigned rate;
+  unsigned satd;
+  unsigned best_ssd;
+  unsigned best_rate;
+  int      best_qii;
+  unsigned cur_cost;
+  unsigned cur_ssd;
+  unsigned cur_rate;
+  int      lambda;
+  int      nblocks;
+  int      nqis;
+  int      pli;
+  int      bi;
+  int      qii;
+  lambda=_enc->lambda;
+  nqis=_enc->state.nqis;
+  ssd=_modec->ssd;
+  rate=_modec->rate;
+  /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded
+     order, we assume a constant overhead for coded block and qii flags.*/
+  nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  nblocks=(nblocks-4>>1)+4;
+  bi=4;
+  for(pli=1;pli<3;pli++){
+    for(;bi<nblocks;bi++){
+      unsigned best_cost;
+      satd=_frag_satd[bi];
+      best_rate=oc_dct_cost2(&best_ssd,_enc->state.qis[0],pli,_qti,satd)
+       +OC_CHROMA_QII_RATE;
+      best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda);
+      best_qii=0;
+      for(qii=1;qii<nqis;qii++){
+        cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,_qti,satd)
+         +OC_CHROMA_QII_RATE;
+        cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda);
+        if(cur_cost<best_cost){
+          best_cost=cur_cost;
+          best_ssd=cur_ssd;
+          best_rate=cur_rate;
+          best_qii=qii;
+        }
+      }
+      if(_skip_ssd[bi]<UINT_MAX){
+        cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
+        cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate,lambda);
+        if(cur_cost<=best_cost){
+          best_ssd=cur_ssd;
+          best_rate=0;
+          best_qii+=4;
+        }
+      }
+      rate+=best_rate;
+      ssd+=best_ssd;
+      _modec->qii[bi]=best_qii;
+    }
+    nblocks=(nblocks-4<<1)+4;
+  }
+  _modec->ssd=ssd;
+  _modec->rate=rate;
+}
+
+static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe,
+ unsigned _mbi,unsigned _ssd[12]){
+  OC_ALIGN16(ogg_int16_t  buffer[64]);
+  const unsigned char    *src;
+  const unsigned char    *ref;
+  int                     ystride;
+  const oc_fragment      *frags;
+  const ptrdiff_t        *frag_buf_offs;
+  const ptrdiff_t        *sb_map;
+  const oc_mb_map_plane  *mb_map;
+  const unsigned char    *map_idxs;
+  int                     map_nidxs;
+  ogg_int64_t             mask;
+  unsigned                uncoded_ssd;
+  int                     uncoded_dc;
+  unsigned                dc_dequant;
+  int                     dc_flag;
+  int                     mapii;
+  int                     mapi;
+  int                     pli;
+  int                     bi;
+  ptrdiff_t               fragi;
+  ptrdiff_t               frag_offs;
+  int                     borderi;
+  int                     pi;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+  ystride=_enc->state.ref_ystride[0];
+  frags=_enc->state.frags;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][0][1][0];
+  for(bi=0;bi<4;bi++){
+    fragi=sb_map[bi];
+    frag_offs=frag_buf_offs[fragi];
+    oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride);
+    borderi=frags[fragi].borderi;
+    uncoded_ssd=uncoded_dc=0;
+    if(borderi<0){
+      for(pi=0;pi<64;pi++){
+        uncoded_ssd+=buffer[pi]*buffer[pi];
+        uncoded_dc+=buffer[pi];
+      }
+    }
+    else{
+      ogg_int64_t mask;
+      mask=_enc->state.borders[borderi].mask;
+      for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+        uncoded_ssd+=buffer[pi]*buffer[pi];
+        uncoded_dc+=buffer[pi];
+      }
+    }
+    /*Scale to match DCT domain.*/
+    uncoded_ssd<<=4;
+    /*We actually only want the AC contribution to the SSD.*/
+    uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
+    /*DC is a special case; if there's more than a full-quantizer improvement
+       in the effective DC component, always force-code the block.*/
+    dc_flag=abs(uncoded_dc)>dc_dequant<<1;
+    uncoded_ssd|=-dc_flag;
+    _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd;
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=(map_nidxs-4>>1)+4;
+  mapii=4;
+  for(pli=1;pli<3;pli++){
+    ystride=_enc->state.ref_ystride[pli];
+    dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][pli][1][0];
+    for(;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride);
+      borderi=frags[fragi].borderi;
+      uncoded_ssd=uncoded_dc=0;
+      if(borderi<0){
+        for(pi=0;pi<64;pi++){
+          uncoded_ssd+=buffer[pi]*buffer[pi];
+          uncoded_dc+=buffer[pi];
+        }
+      }
+      else{
+        mask=_enc->state.borders[borderi].mask;
+        for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
+          uncoded_ssd+=buffer[pi]*buffer[pi];
+          uncoded_dc+=buffer[pi];
+        }
+      }
+      /*Scale to match DCT domain.*/
+      uncoded_ssd<<=4;
+      /*We actually only want the AC contribution to the SSD.*/
+      uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
+      /*DC is a special case; if there's more than a full-quantizer improvement
+         in the effective DC component, always force-code the block.*/
+      dc_flag=abs(uncoded_dc)>dc_dequant<<1;
+      uncoded_ssd|=-dc_flag;
+      _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd;
+    }
+    map_nidxs=(map_nidxs-4<<1)+4;
+  }
+}
+
+static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _frag_satd[12]){
+  const unsigned char   *src;
+  const ptrdiff_t       *frag_buf_offs;
+  const ptrdiff_t       *sb_map;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    mapii;
+  int                    mapi;
+  int                    ystride;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[0];
+  for(bi=0;bi<4;bi++){
+    fragi=sb_map[bi];
+    frag_offs=frag_buf_offs[fragi];
+    _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  for(mapii=4;mapii<map_nidxs;mapii++){
+    mapi=map_idxs[mapii];
+    pli=mapi>>2;
+    bi=mapi&3;
+    fragi=mb_map[pli][bi];
+    frag_offs=frag_buf_offs[fragi];
+    _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  }
+}
+
+static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12]){
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
+  _modec->overhead+=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const signed char *_mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
+  unsigned               frag_satd[12];
+  const unsigned char   *src;
+  const unsigned char   *ref;
+  int                    ystride;
+  const ptrdiff_t       *frag_buf_offs;
+  const ptrdiff_t       *sb_map;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    mapii;
+  int                    mapi;
+  int                    mv_offs[2];
+  int                    dx;
+  int                    dy;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[
+   _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]];
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  dx=_mv[0];
+  dy=_mv[1];
+  _modec->rate=_modec->ssd=0;
+  if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
+    for(bi=0;bi<4;bi++){
+      fragi=sb_map[bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+  }
+  else{
+    for(bi=0;bi<4;bi++){
+      fragi=sb_map[bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,dx,dy)>1){
+    for(mapii=4;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      pli=mapi>>2;
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+  }
+  else{
+    for(mapii=4;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      pli=mapi>>2;
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+  }
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+  _modec->overhead+=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12]){
+  static const oc_mv OC_MV_ZERO;
+  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,_fr,_qs,_skip_ssd);
+}
+
+static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const signed char *_mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
+  int bits0;
+  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd);
+  bits0=OC_MV_BITS[0][_mv[0]+31]+OC_MV_BITS[0][_mv[1]+31];
+  _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12)
+   -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+  return bits0;
+}
+
+/*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/
+static const unsigned char OC_MB_PHASE[4][4]={
+  {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0}
+};
+
+static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12]){
+  unsigned               frag_satd[12];
+  oc_mv                  lbmvs[4];
+  oc_mv                  cbmvs[4];
+  const unsigned char   *src;
+  const unsigned char   *ref;
+  int                    ystride;
+  const ptrdiff_t       *frag_buf_offs;
+  oc_mv                 *frag_mvs;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    nqis;
+  int                    mapii;
+  int                    mapi;
+  int                    mv_offs[2];
+  int                    dx;
+  int                    dy;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  int                    bits0;
+  int                    bits1;
+  unsigned               satd;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  frag_mvs=_enc->state.frag_mvs;
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  _modec->rate=_modec->ssd=0;
+  for(bi=0;bi<4;bi++){
+    fragi=mb_map[0][bi];
+    dx=_mv[bi][0];
+    dy=_mv[bi][1];
+    /*Save the block MVs as the current ones while we're here; we'll replace
+       them if we don't ultimately choose 4MV mode.*/
+    frag_mvs[fragi][0]=(signed char)dx;
+    frag_mvs[fragi][1]=(signed char)dy;
+    frag_offs=frag_buf_offs[fragi];
+    if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
+      satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+    else{
+      satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+    frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd;
+  }
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,
+   _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,1);
+  /*Figure out which blocks are being skipped and give them (0,0) MVs.*/
+  bits0=0;
+  bits1=0;
+  nqis=_enc->state.nqis;
+  for(bi=0;bi<4;bi++){
+    if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){
+      memset(lbmvs+bi,0,sizeof(*lbmvs));
+    }
+    else{
+      memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs));
+      bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31];
+      bits1+=12;
+    }
+  }
+  (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs,
+   (const oc_mv *)lbmvs);
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  for(mapii=4;mapii<map_nidxs;mapii++){
+    mapi=map_idxs[mapii];
+    pli=mapi>>2;
+    bi=mapi&3;
+    fragi=mb_map[pli][bi];
+    dx=cbmvs[bi][0];
+    dy=cbmvs[bi][1];
+    frag_offs=frag_buf_offs[fragi];
+    /*TODO: We could save half these calls by re-using the results for the Cb
+       and Cr planes; is it worth it?*/
+    if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){
+      satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+    }
+    else{
+      satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+    }
+    frag_satd[mapii]=satd;
+  }
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
+  _modec->overhead+=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR)
+   +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1)
+   -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_enc_pipeline_state   pipe;
+  oc_qii_state            intra_luma_qs;
+  oc_mv                   last_mv;
+  oc_mv                   prior_mv;
+  ogg_int64_t             interbits;
+  ogg_int64_t             intrabits;
+  const unsigned char    *map_idxs;
+  int                     nmap_idxs;
+  unsigned               *coded_mbis;
+  unsigned               *uncoded_mbis;
+  size_t                  ncoded_mbis;
+  size_t                  nuncoded_mbis;
+  oc_sb_flags            *sb_flags;
+  signed char            *mb_modes;
+  const oc_sb_map        *sb_maps;
+  const oc_mb_map        *mb_maps;
+  oc_mb_enc_info         *embs;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  int                     qi;
+  unsigned                stripe_sby;
+  unsigned                mcu_nvsbs;
+  int                     notstart;
+  int                     notdone;
+  int                     vdec;
+  unsigned                sbi;
+  unsigned                sbi_end;
+  int                     refi;
+  int                     pli;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
+  _enc->state.frame_type=OC_INTER_FRAME;
+  oc_mode_scheme_chooser_reset(&_enc->chooser);
+  oc_enc_tokenize_start(_enc);
+  oc_enc_pipeline_init(_enc,&pipe);
+  if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs);
+  _enc->mv_bits[0]=_enc->mv_bits[1]=0;
+  interbits=intrabits=0;
+  last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0;
+  /*Choose MVs and MB modes and quantize and code luma.
+    Must be done in Hilbert order.*/
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  qi=_enc->state.qis[0];
+  coded_mbis=_enc->coded_mbis;
+  uncoded_mbis=coded_mbis+_enc->state.nmbs;
+  ncoded_mbis=0;
+  nuncoded_mbis=0;
+  _enc->state.ncoded_fragis[0]=0;
+  _enc->state.ncoded_fragis[1]=0;
+  _enc->state.ncoded_fragis[2]=0;
+  sb_flags=_enc->state.sb_flags;
+  mb_modes=_enc->state.mb_modes;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  embs=_enc->mb_info;
+  frags=_enc->state.frags;
+  frag_mvs=_enc->state.frag_mvs;
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  notstart=0;
+  notdone=1;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+    notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
+    sbi_end=pipe.sbi_end[0];
+    for(sbi=pipe.sbi0[0];sbi<sbi_end;sbi++){
+      int quadi;
+      /*Mode addressing is through Y plane, always 4 MB per SB.*/
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        oc_mode_choice modes[8];
+        unsigned       skip_ssd[12];
+        unsigned       intra_satd[12];
+        int            mb_mv_bits_0;
+        int            mb_gmv_bits_0;
+        int            inter_mv_pref;
+        int            mb_mode;
+        int            dx;
+        int            dy;
+        unsigned       mbi;
+        int            mapii;
+        int            mapi;
+        int            bi;
+        ptrdiff_t      fragi;
+        mbi=sbi<<2|quadi;
+        /*Motion estimation:
+          We always do a basic 1MV search for all macroblocks, coded or not,
+           keyframe or not.*/
+        if(!_recode&&_enc->sp_level<OC_SP_LEVEL_NOMC)oc_mcenc_search(_enc,mbi);
+        dx=dy=0;
+        /*Find the block choice with the lowest estimated coding cost.
+          If a Cb or Cr block is coded but no Y' block from a macro block then
+           the mode MUST be OC_MODE_INTER_NOMV.
+          This is the default state to which the mode data structure is
+           initialised in encoder and decoder at the start of each frame.*/
+        /*Block coding cost is estimated from correlated SATD metrics.*/
+        /*At this point, all blocks that are in frame are still marked coded.*/
+        if(!_recode){
+          memcpy(embs[mbi].unref_mv,
+           embs[mbi].analysis_mv[0],sizeof(embs[mbi].unref_mv));
+          embs[mbi].refined=0;
+        }
+        oc_mb_intra_satd(_enc,mbi,intra_satd);
+        /*Estimate the cost of coding this MB in a keyframe.*/
+        if(_allow_keyframe){
+          oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+           pipe.fr+0,&intra_luma_qs,intra_satd,OC_NOSKIP);
+          intrabits+=modes[OC_MODE_INTRA].rate;
+          for(bi=0;bi<4;bi++){
+            oc_qii_state_advance(&intra_luma_qs,&intra_luma_qs,
+             modes[OC_MODE_INTRA].qii[bi]);
+          }
+        }
+        /*Estimate the cost in a delta frame for various modes.*/
+        oc_skip_cost(_enc,&pipe,mbi,skip_ssd);
+        oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
+         OC_MODE_INTER_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
+        if(_enc->sp_level<OC_SP_LEVEL_NOMC){
+          oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+           pipe.fr+0,pipe.qs+0,intra_satd,skip_ssd);
+          mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
+           OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV],
+           pipe.fr+0,pipe.qs+0,skip_ssd);
+          oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST,mbi,
+           OC_MODE_INTER_MV_LAST,last_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+          oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST2,mbi,
+           OC_MODE_INTER_MV_LAST2,prior_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+          oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
+           embs[mbi].block_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+          oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
+           OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
+          mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
+           OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD],
+           pipe.fr+0,pipe.qs+0,skip_ssd);
+          /*The explicit MV modes (2,6,7) have not yet gone through halfpel
+             refinement.
+            We choose the explicit MV mode that's already furthest ahead on
+             R-D cost and refine only that one.
+            We have to be careful to remember which ones we've refined so that
+             we don't refine it again if we re-encode this frame.*/
+          inter_mv_pref=_enc->lambda*3;
+          if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_INTER_MV].cost&&
+           modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_GOLDEN_MV].cost){
+            if(!(embs[mbi].refined&0x80)){
+              oc_mcenc_refine4mv(_enc,mbi);
+              embs[mbi].refined|=0x80;
+            }
+            oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
+             embs[mbi].ref_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
+          }
+          else if(modes[OC_MODE_GOLDEN_MV].cost+inter_mv_pref<
+           modes[OC_MODE_INTER_MV].cost){
+            if(!(embs[mbi].refined&0x40)){
+              oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_GOLD);
+              embs[mbi].refined|=0x40;
+            }
+            mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
+             OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD],
+             pipe.fr+0,pipe.qs+0,skip_ssd);
+          }
+          if(!(embs[mbi].refined&0x04)){
+            oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_PREV);
+            embs[mbi].refined|=0x04;
+          }
+          mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
+           OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV],
+           pipe.fr+0,pipe.qs+0,skip_ssd);
+          /*Finally, pick the mode with the cheapest estimated R-D cost.*/
+          mb_mode=OC_MODE_INTER_NOMV;
+          if(modes[OC_MODE_INTRA].cost<modes[OC_MODE_INTER_NOMV].cost){
+            mb_mode=OC_MODE_INTRA;
+          }
+          if(modes[OC_MODE_INTER_MV_LAST].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_INTER_MV_LAST;
+          }
+          if(modes[OC_MODE_INTER_MV_LAST2].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_INTER_MV_LAST2;
+          }
+          if(modes[OC_MODE_GOLDEN_NOMV].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_GOLDEN_NOMV;
+          }
+          if(modes[OC_MODE_GOLDEN_MV].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_GOLDEN_MV;
+          }
+          if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_INTER_MV_FOUR;
+          }
+          /*We prefer OC_MODE_INTER_MV, but not over LAST and LAST2.*/
+          if(mb_mode==OC_MODE_INTER_MV_LAST||mb_mode==OC_MODE_INTER_MV_LAST2){
+            inter_mv_pref=0;
+          }
+          if(modes[OC_MODE_INTER_MV].cost<modes[mb_mode].cost+inter_mv_pref){
+            mb_mode=OC_MODE_INTER_MV;
+          }
+        }
+        else{
+          oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
+           OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
+          mb_mode=OC_MODE_INTER_NOMV;
+          if(modes[OC_MODE_INTRA].cost<modes[OC_MODE_INTER_NOMV].cost){
+            mb_mode=OC_MODE_INTRA;
+          }
+          if(modes[OC_MODE_GOLDEN_NOMV].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_GOLDEN_NOMV;
+          }
+          mb_mv_bits_0=mb_gmv_bits_0=0;
+        }
+        mb_modes[mbi]=mb_mode;
+        /*Propagate the MVs to the luma blocks.*/
+        if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+          switch(mb_mode){
+            case OC_MODE_INTER_MV:{
+              dx=embs[mbi].analysis_mv[0][OC_FRAME_PREV][0];
+              dy=embs[mbi].analysis_mv[0][OC_FRAME_PREV][1];
+            }break;
+            case OC_MODE_INTER_MV_LAST:{
+              dx=last_mv[0];
+              dy=last_mv[1];
+            }break;
+            case OC_MODE_INTER_MV_LAST2:{
+              dx=prior_mv[0];
+              dy=prior_mv[1];
+            }break;
+            case OC_MODE_GOLDEN_MV:{
+              dx=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][0];
+              dy=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][1];
+            }break;
+          }
+          for(bi=0;bi<4;bi++){
+            fragi=mb_maps[mbi][0][bi];
+            frag_mvs[fragi][0]=(signed char)dx;
+            frag_mvs[fragi][1]=(signed char)dy;
+          }
+        }
+        for(bi=0;bi<4;bi++){
+          fragi=sb_maps[mbi>>2][mbi&3][bi];
+          frags[fragi].qii=modes[mb_mode].qii[bi];
+        }
+        if(oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,
+         modes[mb_mode].overhead>>OC_BIT_SCALE)>0){
+          int orig_mb_mode;
+          orig_mb_mode=mb_mode;
+          mb_mode=mb_modes[mbi];
+          switch(mb_mode){
+            case OC_MODE_INTER_MV:{
+              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              /*If we're backing out from 4MV, find the MV we're actually
+                 using.*/
+              if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){
+                for(bi=0;;bi++){
+                  fragi=mb_maps[mbi][0][bi];
+                  if(frags[fragi].coded){
+                    memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
+                    dx=frag_mvs[fragi][0];
+                    dy=frag_mvs[fragi][1];
+                    break;
+                  }
+                }
+                mb_mv_bits_0=OC_MV_BITS[0][dx+31]+OC_MV_BITS[0][dy+31];
+              }
+              /*Otherwise we used the original analysis MV.*/
+              else{
+                memcpy(last_mv,
+                 embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv));
+              }
+              _enc->mv_bits[0]+=mb_mv_bits_0;
+              _enc->mv_bits[1]+=12;
+            }break;
+            case OC_MODE_INTER_MV_LAST2:{
+              oc_mv tmp_mv;
+              memcpy(tmp_mv,prior_mv,sizeof(tmp_mv));
+              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              memcpy(last_mv,tmp_mv,sizeof(last_mv));
+            }break;
+            case OC_MODE_GOLDEN_MV:{
+              _enc->mv_bits[0]+=mb_gmv_bits_0;
+              _enc->mv_bits[1]+=12;
+            }break;
+            case OC_MODE_INTER_MV_FOUR:{
+              oc_mv lbmvs[4];
+              oc_mv cbmvs[4];
+              memcpy(prior_mv,last_mv,sizeof(prior_mv));
+              for(bi=0;bi<4;bi++){
+                fragi=mb_maps[mbi][0][bi];
+                if(frags[fragi].coded){
+                  memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
+                  memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi]));
+                  _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31]
+                   +OC_MV_BITS[0][frag_mvs[fragi][1]+31];
+                  _enc->mv_bits[1]+=12;
+                }
+                /*Replace the block MVs for not-coded blocks with (0,0).*/
+                else memset(lbmvs[bi],0,sizeof(lbmvs[bi]));
+              }
+              (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+              for(mapii=4;mapii<nmap_idxs;mapii++){
+                mapi=map_idxs[mapii];
+                pli=mapi>>2;
+                bi=mapi&3;
+                fragi=mb_maps[mbi][pli][bi];
+                frags[fragi].mb_mode=mb_mode;
+                frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
+                memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi]));
+              }
+            }break;
+          }
+          coded_mbis[ncoded_mbis++]=mbi;
+          oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode);
+          interbits+=modes[mb_mode].rate+modes[mb_mode].overhead;
+        }
+        else{
+          *(uncoded_mbis-++nuncoded_mbis)=mbi;
+          mb_mode=OC_MODE_INTER_NOMV;
+          dx=dy=0;
+        }
+        /*Propagate final MB mode and MVs to the chroma blocks.
+          This has already been done for 4MV mode, since it requires individual
+           block motion vectors.*/
+        if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+          for(mapii=4;mapii<nmap_idxs;mapii++){
+            mapi=map_idxs[mapii];
+            pli=mapi>>2;
+            bi=mapi&3;
+            fragi=mb_maps[mbi][pli][bi];
+            frags[fragi].mb_mode=mb_mode;
+            /*If we switched from 4MV mode to INTER_MV mode, then the qii
+               values won't have been chosen with the right MV, but it's
+               probaby not worth re-estimating them.*/
+            frags[fragi].qii=modes[mb_mode].qii[mapii];
+            frag_mvs[fragi][0]=(signed char)dx;
+            frag_mvs[fragi][1]=(signed char)dy;
+          }
+        }
+      }
+      oc_fr_state_flush_sb(pipe.fr+0);
+      sb_flags[sbi].coded_fully=pipe.fr[0].sb_full;
+      sb_flags[sbi].coded_partially=pipe.fr[0].sb_partial;
+    }
+    oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
+    /*Code chroma planes.*/
+    for(pli=1;pli<3;pli++){
+      oc_enc_sb_transform_quantize_chroma(_enc,&pipe,
+       pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
+      oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
+    }
+    notstart=1;
+  }
+  /*Finish filling in the reference frame borders.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+  /*Finish adding flagging overhead costs to inter bit counts to determine if
+     we should have coded a key frame instead.*/
+  if(_allow_keyframe){
+    if(interbits>intrabits)return 1;
+    /*Technically the chroma plane counts are over-estimations, because they
+       don't account for continuing runs from the luma planes, but the
+       inaccuracy is small.*/
+    for(pli=0;pli<3;pli++)interbits+=pipe.fr[pli].bits<<OC_BIT_SCALE;
+    interbits+=OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+    interbits+=
+     _enc->chooser.scheme_bits[_enc->chooser.scheme_list[0]]<<OC_BIT_SCALE;
+    if(interbits>intrabits)return 1;
+  }
+  _enc->ncoded_mbis=ncoded_mbis;
+  /*Compact the coded fragment list.*/
+  {
+    ptrdiff_t ncoded_fragis;
+    ncoded_fragis=_enc->state.ncoded_fragis[0];
+    for(pli=1;pli<3;pli++){
+      memmove(_enc->state.coded_fragis+ncoded_fragis,
+       _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset,
+       _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis));
+      ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+    }
+    _enc->state.ntotal_coded_fragis=ncoded_fragis;
+  }
+  return 0;
+}
+
+#if defined(OC_COLLECT_METRICS)
+# include <stdio.h>
+# include <math.h>
+
+/*TODO: It may be helpful (for block-level quantizers especially) to separate
+   out the contributions from AC and DC into separate tables.*/
+
+# define OC_ZWEIGHT   (0.25)
+
+static void oc_mode_metrics_add(oc_mode_metrics *_metrics,
+ double _w,int _satd,int _rate,double _rmse){
+  double rate;
+  /*Accumulate statistics without the scaling; this lets us change the scale
+     factor yet still use old data.*/
+  rate=ldexp(_rate,-OC_BIT_SCALE);
+  if(_metrics->fragw>0){
+    double dsatd;
+    double drate;
+    double drmse;
+    double w;
+    dsatd=_satd-_metrics->satd/_metrics->fragw;
+    drate=rate-_metrics->rate/_metrics->fragw;
+    drmse=_rmse-_metrics->rmse/_metrics->fragw;
+    w=_metrics->fragw*_w/(_metrics->fragw+_w);
+    _metrics->satd2+=dsatd*dsatd*w;
+    _metrics->satdrate+=dsatd*drate*w;
+    _metrics->rate2+=drate*drate*w;
+    _metrics->satdrmse+=dsatd*drmse*w;
+    _metrics->rmse2+=drmse*drmse*w;
+  }
+  _metrics->fragw+=_w;
+  _metrics->satd+=_satd*_w;
+  _metrics->rate+=rate*_w;
+  _metrics->rmse+=_rmse*_w;
+}
+
+static void oc_mode_metrics_merge(oc_mode_metrics *_dst,
+ const oc_mode_metrics *_src,int _n){
+  int i;
+  /*Find a non-empty set of metrics.*/
+  for(i=0;i<_n&&_src[i].fragw<=0;i++);
+  if(i>=_n){
+    memset(_dst,0,sizeof(*_dst));
+    return;
+  }
+  memcpy(_dst,_src+i,sizeof(*_dst));
+  /*And iterate over the remaining non-empty sets of metrics.*/
+  for(i++;i<_n;i++)if(_src[i].fragw>0){
+    double wa;
+    double wb;
+    double dsatd;
+    double drate;
+    double drmse;
+    double w;
+    wa=_dst->fragw;
+    wb=_src[i].fragw;
+    dsatd=_src[i].satd/wb-_dst->satd/wa;
+    drate=_src[i].rate/wb-_dst->rate/wa;
+    drmse=_src[i].rmse/wb-_dst->rmse/wa;
+    w=wa*wb/(wa+wb);
+    _dst->fragw+=_src[i].fragw;
+    _dst->satd+=_src[i].satd;
+    _dst->rate+=_src[i].rate;
+    _dst->rmse+=_src[i].rmse;
+    _dst->satd2+=_src[i].satd2+dsatd*dsatd*w;
+    _dst->satdrate+=_src[i].satdrate+dsatd*drate*w;
+    _dst->rate2+=_src[i].rate2+drate*drate*w;
+    _dst->satdrmse+=_src[i].satdrmse+dsatd*drmse*w;
+    _dst->rmse2+=_src[i].rmse2+drmse*drmse*w;
+  }
+}
+
+/*Compile collected SATD/rate/RMSE metrics into a form that's immediately
+   useful for mode decision.*/
+static void oc_enc_mode_metrics_update(oc_enc_ctx *_enc,int _qi){
+  int pli;
+  int qti;
+  oc_restore_fpu(&_enc->state);
+  /*Convert raw collected data into cleaned up sample points.*/
+  for(pli=0;pli<3;pli++){
+    for(qti=0;qti<2;qti++){
+      double fragw;
+      int    bin0;
+      int    bin1;
+      int    bin;
+      fragw=0;
+      bin0=bin1=0;
+      for(bin=0;bin<OC_SAD_BINS;bin++){
+        oc_mode_metrics metrics;
+        OC_MODE_RD[_qi][pli][qti][bin].rate=0;
+        OC_MODE_RD[_qi][pli][qti][bin].rmse=0;
+        /*Find some points on either side of the current bin.*/
+        while((bin1<bin+1||fragw<OC_ZWEIGHT)&&bin1<OC_SAD_BINS-1){
+          fragw+=OC_MODE_METRICS[_qi][pli][qti][bin1++].fragw;
+        }
+        while(bin0+1<bin&&bin0+1<bin1&&
+         fragw-OC_MODE_METRICS[_qi][pli][qti][bin0].fragw>=OC_ZWEIGHT){
+          fragw-=OC_MODE_METRICS[_qi][pli][qti][bin0++].fragw;
+        }
+        /*Merge statistics and fit lines.*/
+        oc_mode_metrics_merge(&metrics,
+         OC_MODE_METRICS[_qi][pli][qti]+bin0,bin1-bin0);
+        if(metrics.fragw>0&&metrics.satd2>0){
+          double a;
+          double b;
+          double msatd;
+          double mrate;
+          double mrmse;
+          double rate;
+          double rmse;
+          msatd=metrics.satd/metrics.fragw;
+          mrate=metrics.rate/metrics.fragw;
+          mrmse=metrics.rmse/metrics.fragw;
+          /*Compute the points on these lines corresponding to the actual bin
+             value.*/
+          b=metrics.satdrate/metrics.satd2;
+          a=mrate-b*msatd;
+          rate=ldexp(a+b*(bin<<OC_SAD_SHIFT),OC_BIT_SCALE);
+          OC_MODE_RD[_qi][pli][qti][bin].rate=
+           (ogg_int16_t)OC_CLAMPI(-32768,(int)(rate+0.5),32767);
+          b=metrics.satdrmse/metrics.satd2;
+          a=mrmse-b*msatd;
+          rmse=ldexp(a+b*(bin<<OC_SAD_SHIFT),OC_RMSE_SCALE);
+          OC_MODE_RD[_qi][pli][qti][bin].rmse=
+           (ogg_int16_t)OC_CLAMPI(-32768,(int)(rmse+0.5),32767);
+        }
+      }
+    }
+  }
+}
+
+
+
+/*The following token skipping code used to also be used in the decoder (and
+   even at one point other places in the encoder).
+  However, it was obsoleted by other optimizations, and is now only used here.
+  It has been moved here to avoid generating the code when it's not needed.*/
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.*/
+typedef ptrdiff_t (*oc_token_skip_func)(int _token,int _extra_bits);
+
+/*Handles the simple end of block tokens.*/
+static ptrdiff_t oc_token_skip_eob(int _token,int _extra_bits){
+  int nblocks_adjust;
+  nblocks_adjust=OC_UNIBBLE_TABLE32(0,1,2,3,7,15,0,0,_token)+1;
+  return -_extra_bits-nblocks_adjust;
+}
+
+/*The last EOB token has a special case, where an EOB run of size zero ends all
+   the remaining blocks in the frame.*/
+static ptrdiff_t oc_token_skip_eob6(int _token,int _extra_bits){
+  /*Note: We want to return -PTRDIFF_MAX, but that requires C99, which is not
+     yet available everywhere; this should be equivalent.*/
+  if(!_extra_bits)return -(~(size_t)0>>1);
+  return -_extra_bits;
+}
+
+/*Handles the pure zero run tokens.*/
+static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
+  return _extra_bits+1;
+}
+
+/*Handles a normal coefficient value token.*/
+static ptrdiff_t oc_token_skip_val(void){
+  return 1;
+}
+
+/*Handles a category 1A zero run/coefficient value combo token.*/
+static ptrdiff_t oc_token_skip_run_cat1a(int _token){
+  return _token-OC_DCT_RUN_CAT1A+2;
+}
+
+/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/
+static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
+  int run_cati;
+  int ncoeffs_mask;
+  int ncoeffs_adjust;
+  run_cati=_token-OC_DCT_RUN_CAT1B;
+  ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati);
+  ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati);
+  return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust;
+}
+
+/*A jump table for computing the number of coefficients or blocks to skip for
+   a given token value.
+  This reduces all the conditional branches, etc., needed to parse these token
+   values down to one indirect jump.*/
+static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob6,
+  oc_token_skip_zrl,
+  oc_token_skip_zrl,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run
+};
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.
+          0 will never be returned, so that at least one coefficient in one
+           block will always be decoded for every token.*/
+static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
+  return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
+}
+
+
+
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){
+  static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+     0,16,16,16,16,16,32,32,
+    32,32,32,32,32,32,32,48,
+    48,48,48,48,48,48,48,48,
+    48,48,48,48,64,64,64,64,
+    64,64,64,64,64,64,64,64,
+    64,64,64,64,64,64,64,64,
+    64,64,64,64,64,64,64,64
+  };
+  const oc_fragment *frags;
+  const unsigned    *frag_satd;
+  const unsigned    *frag_ssd;
+  const ptrdiff_t   *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          fragii;
+  double             fragw;
+  int                qti;
+  int                qii;
+  int                qi;
+  int                pli;
+  int                zzi;
+  int                token;
+  int                eb;
+  oc_restore_fpu(&_enc->state);
+  /*Load any existing mode metrics if we haven't already.*/
+  if(!oc_has_mode_metrics){
+    FILE *fmetrics;
+    memset(OC_MODE_METRICS,0,sizeof(OC_MODE_METRICS));
+    fmetrics=fopen("modedec.stats","rb");
+    if(fmetrics!=NULL){
+      fread(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
+      fclose(fmetrics);
+    }
+    for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
+    oc_has_mode_metrics=1;
+  }
+  qti=_enc->state.frame_type;
+  frags=_enc->state.frags;
+  frag_satd=_enc->frag_satd;
+  frag_ssd=_enc->frag_ssd;
+  coded_fragis=_enc->state.coded_fragis;
+  ncoded_fragis=fragii=0;
+  /*Weight the fragments by the inverse frame size; this prevents HD content
+     from dominating the statistics.*/
+  fragw=1.0/_enc->state.nfrags;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t ti[64];
+    int       eob_token[64];
+    int       eob_run[64];
+    /*Set up token indices and eob run counts.
+      We don't bother trying to figure out the real cost of the runs that span
+       coefficients; instead we use the costs that were available when R-D
+       token optimization was done.*/
+    for(zzi=0;zzi<64;zzi++){
+      ti[zzi]=_enc->dct_token_offs[pli][zzi];
+      if(ti[zzi]>0){
+        token=_enc->dct_tokens[pli][zzi][0];
+        eb=_enc->extra_bits[pli][zzi][0];
+        eob_token[zzi]=token;
+        eob_run[zzi]=-oc_dct_token_skip(token,eb);
+      }
+      else{
+        eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+        eob_run[zzi]=0;
+      }
+    }
+    /*Scan the list of coded fragments for this plane.*/
+    ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+    for(;fragii<ncoded_fragis;fragii++){
+      ptrdiff_t    fragi;
+      ogg_uint32_t frag_bits;
+      int          huffi;
+      int          skip;
+      int          mb_mode;
+      unsigned     satd;
+      int          bin;
+      fragi=coded_fragis[fragii];
+      frag_bits=0;
+      for(zzi=0;zzi<64;){
+        if(eob_run[zzi]>0){
+          /*We've reached the end of the block.*/
+          eob_run[zzi]--;
+          break;
+        }
+        huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1]
+         +OC_ZZI_HUFF_OFFSET[zzi];
+        if(eob_token[zzi]<OC_NDCT_EOB_TOKEN_MAX){
+          /*This token caused an EOB run to be flushed.
+            Therefore it gets the bits associated with it.*/
+          frag_bits+=_enc->huff_codes[huffi][eob_token[zzi]].nbits
+           +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]];
+          eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+        }
+        token=_enc->dct_tokens[pli][zzi][ti[zzi]];
+        eb=_enc->extra_bits[pli][zzi][ti[zzi]];
+        ti[zzi]++;
+        skip=oc_dct_token_skip(token,eb);
+        if(skip<0){
+          eob_token[zzi]=token;
+          eob_run[zzi]=-skip;
+        }
+        else{
+          /*A regular DCT value token; accumulate the bits for it.*/
+          frag_bits+=_enc->huff_codes[huffi][token].nbits
+           +OC_DCT_TOKEN_EXTRA_BITS[token];
+          zzi+=skip;
+        }
+      }
+      mb_mode=frags[fragi].mb_mode;
+      qi=_enc->state.qis[frags[fragi].qii];
+      satd=frag_satd[fragi]<<(pli+1&2);
+      bin=OC_MINI(satd>>OC_SAD_SHIFT,OC_SAD_BINS-1);
+      oc_mode_metrics_add(OC_MODE_METRICS[qi][pli][mb_mode!=OC_MODE_INTRA]+bin,
+       fragw,satd,frag_bits<<OC_BIT_SCALE,sqrt(frag_ssd[fragi]));
+    }
+  }
+  /*Update global SATD/rate/RMSE estimation matrix.*/
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    oc_enc_mode_metrics_update(_enc,_enc->state.qis[qii]);
+  }
+}
+
+void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc){
+  FILE *fmetrics;
+  int   qi;
+  /*Generate sample points for complete list of QI values.*/
+  for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
+  fmetrics=fopen("modedec.stats","wb");
+  if(fmetrics!=NULL){
+    fwrite(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
+    fclose(fmetrics);
+  }
+  fprintf(stdout,
+   "/*File generated by libtheora with OC_COLLECT_METRICS"
+   " defined at compile time.*/\n"
+   "#if !defined(_modedec_H)\n"
+   "# define _modedec_H (1)\n"
+   "\n"
+   "\n"
+   "\n"
+   "# if defined(OC_COLLECT_METRICS)\n"
+   "typedef struct oc_mode_metrics oc_mode_metrics;\n"
+   "# endif\n"
+   "typedef struct oc_mode_rd      oc_mode_rd;\n"
+   "\n"
+   "\n"
+   "\n"
+   "/*The number of extra bits of precision at which to store rate"
+   " metrics.*/\n"
+   "# define OC_BIT_SCALE  (%i)\n"
+   "/*The number of extra bits of precision at which to store RMSE metrics.\n"
+   "  This must be at least half OC_BIT_SCALE (rounded up).*/\n"
+   "# define OC_RMSE_SCALE (%i)\n"
+   "/*The number of bins to partition statistics into.*/\n"
+   "# define OC_SAD_BINS   (%i)\n"
+   "/*The number of bits of precision to drop"
+   " from SAD scores to assign them to a\n"
+   "   bin.*/\n"
+   "# define OC_SAD_SHIFT  (%i)\n"
+   "\n"
+   "\n"
+   "\n"
+   "# if defined(OC_COLLECT_METRICS)\n"
+   "struct oc_mode_metrics{\n"
+   "  double fragw;\n"
+   "  double satd;\n"
+   "  double rate;\n"
+   "  double rmse;\n"
+   "  double satd2;\n"
+   "  double satdrate;\n"
+   "  double rate2;\n"
+   "  double satdrmse;\n"
+   "  double rmse2;\n"
+   "};\n"
+   "\n"
+   "\n"
+   "int             oc_has_mode_metrics;\n"
+   "oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];\n"
+   "# endif\n"
+   "\n"
+   "\n"
+   "\n"
+   "struct oc_mode_rd{\n"
+   "  ogg_int16_t rate;\n"
+   "  ogg_int16_t rmse;\n"
+   "};\n"
+   "\n"
+   "\n"
+   "# if !defined(OC_COLLECT_METRICS)\n"
+   "static const\n"
+   "# endif\n"
+   "oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={\n",
+   OC_BIT_SCALE,OC_RMSE_SCALE,OC_SAD_BINS,OC_SAD_SHIFT);
+  for(qi=0;qi<64;qi++){
+    int pli;
+    fprintf(stdout,"  {\n");
+    for(pli=0;pli<3;pli++){
+      int qti;
+      fprintf(stdout,"    {\n");
+      for(qti=0;qti<2;qti++){
+        int bin;
+        static const char *pl_names[3]={"Y'","Cb","Cr"};
+        static const char *qti_names[2]={"INTRA","INTER"};
+        fprintf(stdout,"      /*%s  qi=%i  %s*/\n",
+         pl_names[pli],qi,qti_names[qti]);
+        fprintf(stdout,"      {\n");
+        fprintf(stdout,"        ");
+        for(bin=0;bin<OC_SAD_BINS;bin++){
+          if(bin&&!(bin&0x3))fprintf(stdout,"\n        ");
+          fprintf(stdout,"{%5i,%5i}",
+           OC_MODE_RD[qi][pli][qti][bin].rate,
+           OC_MODE_RD[qi][pli][qti][bin].rmse);
+          if(bin+1<OC_SAD_BINS)fprintf(stdout,",");
+        }
+        fprintf(stdout,"\n      }");
+        if(qti<1)fprintf(stdout,",");
+        fprintf(stdout,"\n");
+      }
+      fprintf(stdout,"    }");
+      if(pli<2)fprintf(stdout,",");
+      fprintf(stdout,"\n");
+    }
+    fprintf(stdout,"  }");
+    if(qi<63)fprintf(stdout,",");
+    fprintf(stdout,"\n");
+  }
+  fprintf(stdout,
+   "};\n"
+   "\n"
+   "#endif\n");
+}
+#endif

+ 10 - 10
Engine/lib/libtheora/lib/dec/apiwrapper.c → Engine/lib/libtheora/lib/apiwrapper.c

@@ -5,13 +5,13 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-    last mod: $Id: apiwrapper.c 15400 2008-10-15 12:10:58Z tterribe $
+    last mod: $Id: apiwrapper.c 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
@@ -47,10 +47,10 @@ void theora_info_clear(theora_info *_ci){
 void theora_clear(theora_state *_th){
 void theora_clear(theora_state *_th){
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   if(_th->internal_decode!=NULL){
   if(_th->internal_decode!=NULL){
-    (*((oc_state_dispatch_vtbl *)_th->internal_decode)->clear)(_th);
+    (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th);
   }
   }
   if(_th->internal_encode!=NULL){
   if(_th->internal_encode!=NULL){
-    (*((oc_state_dispatch_vtbl *)_th->internal_encode)->clear)(_th);
+    (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th);
   }
   }
   if(_th->i!=NULL)theora_info_clear(_th->i);
   if(_th->i!=NULL)theora_info_clear(_th->i);
   memset(_th,0,sizeof(*_th));
   memset(_th,0,sizeof(*_th));
@@ -59,11 +59,11 @@ void theora_clear(theora_state *_th){
 int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
 int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   if(_th->internal_decode!=NULL){
   if(_th->internal_decode!=NULL){
-    return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->control)(_th,
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th,
      _req,_buf,_buf_sz);
      _req,_buf,_buf_sz);
   }
   }
   else if(_th->internal_encode!=NULL){
   else if(_th->internal_encode!=NULL){
-    return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->control)(_th,
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th,
      _req,_buf,_buf_sz);
      _req,_buf,_buf_sz);
   }
   }
   else return TH_EINVAL;
   else return TH_EINVAL;
@@ -72,11 +72,11 @@ int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
 ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
 ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   if(_th->internal_decode!=NULL){
   if(_th->internal_decode!=NULL){
-    return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_frame)(
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)(
      _th,_gp);
      _th,_gp);
   }
   }
   else if(_th->internal_encode!=NULL){
   else if(_th->internal_encode!=NULL){
-    return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_frame)(
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)(
      _th,_gp);
      _th,_gp);
   }
   }
   else return -1;
   else return -1;
@@ -85,11 +85,11 @@ ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
 double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
 double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
   if(_th->internal_decode!=NULL){
   if(_th->internal_decode!=NULL){
-    return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_time)(
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)(
      _th,_gp);
      _th,_gp);
   }
   }
   else if(_th->internal_encode!=NULL){
   else if(_th->internal_encode!=NULL){
-    return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_time)(
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)(
      _th,_gp);
      _th,_gp);
   }
   }
   else return -1;
   else return -1;

+ 3 - 4
Engine/lib/libtheora/lib/dec/apiwrapper.h → Engine/lib/libtheora/lib/apiwrapper.h

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -20,9 +20,8 @@
 # include <ogg/ogg.h>
 # include <ogg/ogg.h>
 # include <theora/theora.h>
 # include <theora/theora.h>
 # include "theora/theoradec.h"
 # include "theora/theoradec.h"
-/*# include "theora/theoraenc.h"*/
-typedef struct th_enc_ctx th_enc_ctx;
-# include "../internal.h"
+# include "theora/theoraenc.h"
+# include "internal.h"
 
 
 typedef struct th_api_wrapper th_api_wrapper;
 typedef struct th_api_wrapper th_api_wrapper;
 typedef struct th_api_info    th_api_info;
 typedef struct th_api_info    th_api_info;

+ 111 - 0
Engine/lib/libtheora/lib/bitpack.c

@@ -0,0 +1,111 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <string.h>
+#include <stdlib.h>
+#include "bitpack.h"
+
+/*We're 'MSb' endian; if we write a word but read individual bits,
+   then we'll read the MSb first.*/
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){
+  memset(_b,0,sizeof(*_b));
+  _b->ptr=_buf;
+  _b->stop=_buf+_bytes;
+}
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  window=_b->window;
+  available=_b->bits;
+  ptr=_b->ptr;
+  stop=_b->stop;
+  while(available<=OC_PB_WINDOW_SIZE-8&&ptr<stop){
+    available+=8;
+    window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+  }
+  _b->ptr=ptr;
+  if(_bits>available){
+    if(ptr>=stop){
+      _b->eof=1;
+      available=OC_LOTS_OF_BITS;
+    }
+    else window|=*ptr>>(available&7);
+  }
+  _b->bits=available;
+  return window;
+}
+
+int oc_pack_look1(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1)_b->window=window=oc_pack_refill(_b,1);
+  return window>>OC_PB_WINDOW_SIZE-1;
+}
+
+void oc_pack_adv1(oc_pack_buf *_b){
+  _b->window<<=1;
+  _b->bits--;
+}
+
+/*Here we assume that 0<=_bits&&_bits<=32.*/
+long oc_pack_read(oc_pack_buf *_b,int _bits){
+  oc_pb_window window;
+  int          available;
+  long         result;
+  window=_b->window;
+  available=_b->bits;
+  if(_bits==0)return 0;
+  if(available<_bits){
+    window=oc_pack_refill(_b,_bits);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-_bits;
+  available-=_bits;
+  window<<=1;
+  window<<=_bits-1;
+  _b->bits=available;
+  _b->window=window;
+  return result;
+}
+
+int oc_pack_read1(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  int          result;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1){
+    window=oc_pack_refill(_b,1);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-1;
+  available--;
+  window<<=1;
+  _b->bits=available;
+  _b->window=window;
+  return result;
+}
+
+long oc_pack_bytes_left(oc_pack_buf *_b){
+  if(_b->eof)return -1;
+  return _b->stop-_b->ptr+(_b->bits>>3);
+}

+ 34 - 13
Engine/lib/libtheora/lib/dec/bitpack.h → Engine/lib/libtheora/lib/bitpack.h

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008             *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -16,23 +16,44 @@
  ********************************************************************/
  ********************************************************************/
 #if !defined(_bitpack_H)
 #if !defined(_bitpack_H)
 # define _bitpack_H (1)
 # define _bitpack_H (1)
-# include <ogg/ogg.h>
+# include <limits.h>
 
 
-void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes);
-int theorapackB_look1(oggpack_buffer *_b,long *_ret);
-void theorapackB_adv1(oggpack_buffer *_b);
+
+
+typedef unsigned long      oc_pb_window;
+typedef struct oc_pack_buf oc_pack_buf;
+
+
+
+# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+# define OC_LOTS_OF_BITS (0x40000000)
+
+
+
+struct oc_pack_buf{
+  oc_pb_window         window;
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  int                  bits;
+  int                  eof;
+};
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes);
+int oc_pack_look1(oc_pack_buf *_b);
+void oc_pack_adv1(oc_pack_buf *_b);
 /*Here we assume 0<=_bits&&_bits<=32.*/
 /*Here we assume 0<=_bits&&_bits<=32.*/
-int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret);
-int theorapackB_read1(oggpack_buffer *_b,long *_ret);
-long theorapackB_bytes(oggpack_buffer *_b);
-long theorapackB_bits(oggpack_buffer *_b);
-unsigned char *theorapackB_get_buffer(oggpack_buffer *_b);
+long oc_pack_read(oc_pack_buf *_b,int _bits);
+int oc_pack_read1(oc_pack_buf *_b);
+/* returns -1 for read beyond EOF, or the number of whole bytes available */
+long oc_pack_bytes_left(oc_pack_buf *_b);
 
 
 /*These two functions are implemented locally in huffdec.c*/
 /*These two functions are implemented locally in huffdec.c*/
 /*Read in bits without advancing the bitptr.
 /*Read in bits without advancing the bitptr.
   Here we assume 0<=_bits&&_bits<=32.*/
   Here we assume 0<=_bits&&_bits<=32.*/
-/*static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret);*/
-/*static void theorapackB_adv(oggpack_buffer *_b,int _bits);*/
-
+/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
+/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
 
 
 #endif
 #endif

+ 4 - 5
Engine/lib/libtheora/lib/cpu.c

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -14,13 +14,13 @@
   Originally written by Rudolf Marek.
   Originally written by Rudolf Marek.
 
 
  function:
  function:
-  last mod: $Id: cpu.c 15427 2008-10-21 02:36:19Z xiphmont $
+  last mod: $Id: cpu.c 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
 #include "cpu.h"
 #include "cpu.h"
 
 
-#if !defined(USE_ASM)
+#if !defined(OC_X86_ASM)
 static ogg_uint32_t oc_cpu_flags_get(void){
 static ogg_uint32_t oc_cpu_flags_get(void){
   return 0;
   return 0;
 }
 }
@@ -166,7 +166,7 @@ static ogg_uint32_t oc_cpu_flags_get(void){
   /*              D M A c          i t n e          h t u A*/
   /*              D M A c          i t n e          h t u A*/
   else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
   else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
    /*      C S N            y b   e          d o e G*/
    /*      C S N            y b   e          d o e G*/
-   ecx==0x43534E20&&edx==0x79622065&&ebx==0x646F6547){
+   ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
     /*AMD, Geode:*/
     /*AMD, Geode:*/
     cpuid(0x80000000,eax,ebx,ecx,edx);
     cpuid(0x80000000,eax,ebx,ecx,edx);
     if(eax<0x80000001)flags=0;
     if(eax<0x80000001)flags=0;
@@ -192,7 +192,6 @@ static ogg_uint32_t oc_cpu_flags_get(void){
       The C3-2 (Nehemiah) cores appear to, as well.*/
       The C3-2 (Nehemiah) cores appear to, as well.*/
     cpuid(1,eax,ebx,ecx,edx);
     cpuid(1,eax,ebx,ecx,edx);
     flags=oc_parse_intel_flags(edx,ecx);
     flags=oc_parse_intel_flags(edx,ecx);
-    cpuid(0x80000000,eax,ebx,ecx,edx);
     if(eax>=0x80000001){
     if(eax>=0x80000001){
       /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
       /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
         We need to check this even if the Intel test succeeds to pick up 3DNow!
         We need to check this even if the Intel test succeeds to pick up 3DNow!

+ 2 - 2
Engine/lib/libtheora/lib/cpu.h

@@ -5,12 +5,12 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
  function:
  function:
-    last mod: $Id: cpu.h 15430 2008-10-21 05:03:55Z giles $
+    last mod: $Id: cpu.h 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 

+ 2 - 2
Engine/lib/libtheora/lib/dec/dct.h → Engine/lib/libtheora/lib/dct.h

@@ -5,13 +5,13 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-  last mod: $Id: dct.h 15400 2008-10-15 12:10:58Z tterribe $
+  last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 

+ 0 - 121
Engine/lib/libtheora/lib/dec/bitpack.c

@@ -1,121 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008             *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function: packing variable sized words into an octet stream
-  last mod: $Id: bitpack.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/*We're 'MSb' endian; if we write a word but read individual bits,
-   then we'll read the MSb first.*/
-
-#include <string.h>
-#include <stdlib.h>
-#include "bitpack.h"
-
-void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes){
-  memset(_b,0,sizeof(*_b));
-  _b->buffer=_b->ptr=_buf;
-  _b->storage=_bytes;
-}
-
-int theorapackB_look1(oggpack_buffer *_b,long *_ret){
-  if(_b->endbyte>=_b->storage){
-    *_ret=0L;
-    return -1;
-  }
-  *_ret=(_b->ptr[0]>>7-_b->endbit)&1;
-  return 0;
-}
-
-void theorapackB_adv1(oggpack_buffer *_b){
-  if(++(_b->endbit)>7){
-    _b->endbit=0;
-    _b->ptr++;
-    _b->endbyte++;
-  }
-}
-
-/*Here we assume that 0<=_bits&&_bits<=32.*/
-int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret){
-  long ret;
-  long m;
-  long d;
-  int fail;
-  m=32-_bits;
-  _bits+=_b->endbit;
-  d=_b->storage-_b->endbyte;
-  if(d<=4){
-    /*Not the main path.*/
-    if(d*8<_bits){
-      *_ret=0L;
-      fail=-1;
-      goto overflow;
-    }
-    /*Special case to avoid reading _b->ptr[0], which might be past the end of
-       the buffer; also skips some useless accounting.*/
-    else if(!_bits){
-      *_ret=0L;
-      return 0;
-    }
-  }
-  ret=_b->ptr[0]<<24+_b->endbit;
-  if(_bits>8){
-    ret|=_b->ptr[1]<<16+_b->endbit;
-    if(_bits>16){
-      ret|=_b->ptr[2]<<8+_b->endbit;
-      if(_bits>24){
-        ret|=_b->ptr[3]<<_b->endbit;
-        if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
-      }
-    }
-  }
-  *_ret=((ret&0xFFFFFFFFUL)>>(m>>1))>>(m+1>>1);
-  fail=0;
-overflow:
-  _b->ptr+=_bits>>3;
-  _b->endbyte+=_bits>>3;
-  _b->endbit=_bits&7;
-  return fail;
-}
-
-int theorapackB_read1(oggpack_buffer *_b,long *_ret){
-  int fail;
-  if(_b->endbyte>=_b->storage){
-    /*Not the main path.*/
-    *_ret=0L;
-    fail=-1;
-  }
-  else{
-    *_ret=(_b->ptr[0]>>7-_b->endbit)&1;
-    fail=0;
-  }
-  _b->endbit++;
-  if(_b->endbit>7){
-    _b->endbit=0;
-    _b->ptr++;
-    _b->endbyte++;
-  }
-  return fail;
-}
-
-long theorapackB_bytes(oggpack_buffer *_b){
-  return _b->endbyte+(_b->endbit+7>>3);
-}
-
-long theorapackB_bits(oggpack_buffer *_b){
-  return _b->endbyte*8+_b->endbit;
-}
-
-unsigned char *theorapackB_get_buffer(oggpack_buffer *_b){
-  return _b->buffer;
-}

+ 0 - 2057
Engine/lib/libtheora/lib/dec/decode.c

@@ -1,2057 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: decode.c 15403 2008-10-16 12:44:05Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <ogg/ogg.h>
-#include "decint.h"
-#if defined(OC_DUMP_IMAGES)
-# include <stdio.h>
-# include "png.h"
-#endif
-
-/*No post-processing.*/
-#define OC_PP_LEVEL_DISABLED  (0)
-/*Keep track of DC qi for each block only.*/
-#define OC_PP_LEVEL_TRACKDCQI (1)
-/*Deblock the luma plane.*/
-#define OC_PP_LEVEL_DEBLOCKY  (2)
-/*Dering the luma plane.*/
-#define OC_PP_LEVEL_DERINGY   (3)
-/*Stronger luma plane deringing.*/
-#define OC_PP_LEVEL_SDERINGY  (4)
-/*Deblock the chroma planes.*/
-#define OC_PP_LEVEL_DEBLOCKC  (5)
-/*Dering the chroma planes.*/
-#define OC_PP_LEVEL_DERINGC   (6)
-/*Stronger chroma plane deringing.*/
-#define OC_PP_LEVEL_SDERINGC  (7)
-/*Maximum valid post-processing level.*/
-#define OC_PP_LEVEL_MAX       (7)
-
-
-
-/*The mode alphabets for the various mode coding schemes.
-  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
-static const int OC_MODE_ALPHABETS[7][OC_NMODES]={
-  /*Last MV dominates */
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
-    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
-    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
-    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
-    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
-    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
-  },
-  /*No MV dominates.*/
-  {
-    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
-    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  {
-    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
-    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  },
-  /*Default ordering.*/
-  {
-    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
-    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
-    OC_MODE_INTER_MV_FOUR
-  }
-};
-
-
-static int oc_sb_run_unpack(oggpack_buffer *_opb){
-  long bits;
-  int ret;
-  /*Coding scheme:
-       Codeword            Run Length
-     0                       1
-     10x                     2-3
-     110x                    4-5
-     1110xx                  6-9
-     11110xxx                10-17
-     111110xxxx              18-33
-     111111xxxxxxxxxxxx      34-4129*/
-  theorapackB_read1(_opb,&bits);
-  if(bits==0)return 1;
-  theorapackB_read(_opb,2,&bits);
-  if((bits&2)==0)return 2+(int)bits;
-  else if((bits&1)==0){
-    theorapackB_read1(_opb,&bits);
-    return 4+(int)bits;
-  }
-  theorapackB_read(_opb,3,&bits);
-  if((bits&4)==0)return 6+(int)bits;
-  else if((bits&2)==0){
-    ret=10+((bits&1)<<2);
-    theorapackB_read(_opb,2,&bits);
-    return ret+(int)bits;
-  }
-  else if((bits&1)==0){
-    theorapackB_read(_opb,4,&bits);
-    return 18+(int)bits;
-  }
-  theorapackB_read(_opb,12,&bits);
-  return 34+(int)bits;
-}
-
-static int oc_block_run_unpack(oggpack_buffer *_opb){
-  long bits;
-  long bits2;
-  /*Coding scheme:
-     Codeword             Run Length
-     0x                      1-2
-     10x                     3-4
-     110x                    5-6
-     1110xx                  7-10
-     11110xx                 11-14
-     11111xxxx               15-30*/
-  theorapackB_read(_opb,2,&bits);
-  if((bits&2)==0)return 1+(int)bits;
-  else if((bits&1)==0){
-    theorapackB_read1(_opb,&bits);
-    return 3+(int)bits;
-  }
-  theorapackB_read(_opb,2,&bits);
-  if((bits&2)==0)return 5+(int)bits;
-  else if((bits&1)==0){
-    theorapackB_read(_opb,2,&bits);
-    return 7+(int)bits;
-  }
-  theorapackB_read(_opb,3,&bits);
-  if((bits&4)==0)return 11+bits;
-  theorapackB_read(_opb,2,&bits2);
-  return 15+((bits&3)<<2)+bits2;
-}
-
-
-
-static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
- const th_setup_info *_setup){
-  int qti;
-  int pli;
-  int qi;
-  int ret;
-  ret=oc_state_init(&_dec->state,_info);
-  if(ret<0)return ret;
-  oc_huff_trees_copy(_dec->huff_tables,
-   (const oc_huff_node *const *)_setup->huff_tables);
-  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
-    _dec->state.dequant_tables[qti][pli]=
-     _dec->state.dequant_table_data[qti][pli];
-  }
-  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
-   &_setup->qinfo);
-  for(qi=0;qi<64;qi++){
-    int qsum;
-    qsum=0;
-    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
-      qsum+=_dec->state.dequant_tables[qti][pli][qi][18]+
-       _dec->state.dequant_tables[qti][pli][qi][19]+
-       _dec->state.dequant_tables[qti][pli][qi][26]+
-       _dec->state.dequant_tables[qti][pli][qi][27]<<(pli==0);
-    }
-    _dec->pp_sharp_mod[qi]=-(qsum>>11);
-  }
-  _dec->dct_tokens=(unsigned char **)oc_calloc_2d(64,
-   _dec->state.nfrags,sizeof(_dec->dct_tokens[0][0]));
-  _dec->extra_bits=(ogg_uint16_t **)oc_calloc_2d(64,
-   _dec->state.nfrags,sizeof(_dec->extra_bits[0][0]));
-  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
-   sizeof(_dec->state.loop_filter_limits));
-  _dec->pp_level=OC_PP_LEVEL_DISABLED;
-  _dec->dc_qis=NULL;
-  _dec->variances=NULL;
-  _dec->pp_frame_data=NULL;
-  _dec->stripe_cb.ctx=NULL;
-  _dec->stripe_cb.stripe_decoded=NULL;
-  return 0;
-}
-
-static void oc_dec_clear(oc_dec_ctx *_dec){
-  _ogg_free(_dec->pp_frame_data);
-  _ogg_free(_dec->variances);
-  _ogg_free(_dec->dc_qis);
-  oc_free_2d(_dec->extra_bits);
-  oc_free_2d(_dec->dct_tokens);
-  oc_huff_trees_clear(_dec->huff_tables);
-  oc_state_clear(&_dec->state);
-}
-
-
-static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
-  long val;
-  /*Check to make sure this is a data packet.*/
-  theorapackB_read1(&_dec->opb,&val);
-  if(val!=0)return TH_EBADPACKET;
-  /*Read in the frame type (I or P).*/
-  theorapackB_read1(&_dec->opb,&val);
-  _dec->state.frame_type=(int)val;
-  /*Read in the current qi.*/
-  theorapackB_read(&_dec->opb,6,&val);
-  _dec->state.qis[0]=(int)val;
-  theorapackB_read1(&_dec->opb,&val);
-  if(!val)_dec->state.nqis=1;
-  else{
-    theorapackB_read(&_dec->opb,6,&val);
-    _dec->state.qis[1]=(int)val;
-    theorapackB_read1(&_dec->opb,&val);
-    if(!val)_dec->state.nqis=2;
-    else{
-      theorapackB_read(&_dec->opb,6,&val);
-      _dec->state.qis[2]=(int)val;
-      _dec->state.nqis=3;
-    }
-  }
-  if(_dec->state.frame_type==OC_INTRA_FRAME){
-    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
-      Most of the other unused bits in the VP3 headers were eliminated.
-      I don't know why these remain.*/
-    /* I wanted to eliminate wasted bits, but not all config wiggle room --Monty */
-    theorapackB_read(&_dec->opb,3,&val);
-    if(val!=0)return TH_EIMPL;
-  }
-  return 0;
-}
-
-/*Mark all fragments as coded and in OC_MODE_INTRA.
-  This also builds up the coded fragment list (in coded order), and clears the
-   uncoded fragment list.
-  It does not update the coded macro block list, as that is not used when
-   decoding INTRA frames.*/
-static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  int    pli;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
-  prev_ncoded_fragis=ncoded_fragis=0;
-  sb=sb_end=_dec->state.sbs;
-  for(pli=0;pli<3;pli++){
-    const oc_fragment_plane *fplane;
-    fplane=_dec->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    for(;sb<sb_end;sb++){
-      int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
-        int bi;
-        for(bi=0;bi<4;bi++){
-          int fragi;
-          fragi=sb->map[quadi][bi];
-          if(fragi>=0){
-            oc_fragment *frag;
-            frag=_dec->state.frags+fragi;
-            frag->coded=1;
-            frag->mbmode=OC_MODE_INTRA;
-            _dec->state.coded_fragis[ncoded_fragis++]=fragi;
-          }
-        }
-      }
-    }
-    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-    _dec->state.nuncoded_fragis[pli]=0;
-  }
-}
-
-/*Decodes the bit flags for whether or not each super block is partially coded
-   or not.
-  Return: The number of partially coded super blocks.*/
-static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  long   val;
-  int    flag;
-  int    npartial;
-  int    run_count;
-  theorapackB_read1(&_dec->opb,&val);
-  flag=(int)val;
-  sb=_dec->state.sbs;
-  sb_end=sb+_dec->state.nsbs;
-  run_count=npartial=0;
-  while(sb<sb_end){
-    int full_run;
-    run_count=oc_sb_run_unpack(&_dec->opb);
-    full_run=run_count>=4129;
-    do{
-      sb->coded_partially=flag;
-      sb->coded_fully=0;
-      npartial+=flag;
-      sb++;
-    }
-    while(--run_count>0&&sb<sb_end);
-    if(full_run&&sb<sb_end){
-      theorapackB_read1(&_dec->opb,&val);
-      flag=(int)val;
-    }
-    else flag=!flag;
-  }
-  /*TODO: run_count should be 0 here.
-    If it's not, we should issue a warning of some kind.*/
-  return npartial;
-}
-
-/*Decodes the bit flags for whether or not each non-partially-coded super
-   block is fully coded or not.
-  This function should only be called if there is at least one
-   non-partially-coded super block.
-  Return: The number of partially coded super blocks.*/
-static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  long   val;
-  int    flag;
-  int    run_count;
-  sb=_dec->state.sbs;
-  sb_end=sb+_dec->state.nsbs;
-  /*Skip partially coded super blocks.*/
-  for(;sb->coded_partially;sb++);
-  theorapackB_read1(&_dec->opb,&val);
-  flag=(int)val;
-  while(sb<sb_end){
-    int full_run;
-    run_count=oc_sb_run_unpack(&_dec->opb);
-    full_run=run_count>=4129;
-    for(;sb<sb_end;sb++){
-      if(sb->coded_partially)continue;
-      if(run_count--<=0)break;
-      sb->coded_fully=flag;
-    }
-    if(full_run&&sb<sb_end){
-      theorapackB_read1(&_dec->opb,&val);
-      flag=(int)val;
-    }
-    else flag=!flag;
-  }
-  /*TODO: run_count should be 0 here.
-    If it's not, we should issue a warning of some kind.*/
-}
-
-static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
-  oc_sb *sb;
-  oc_sb *sb_end;
-  long   val;
-  int    npartial;
-  int    pli;
-  int    flag;
-  int    run_count;
-  int    ncoded_fragis;
-  int    prev_ncoded_fragis;
-  int    nuncoded_fragis;
-  int    prev_nuncoded_fragis;
-  npartial=oc_dec_partial_sb_flags_unpack(_dec);
-  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
-  if(npartial>0){
-    theorapackB_read1(&_dec->opb,&val);
-    flag=!(int)val;
-  }
-  else flag=0;
-  run_count=0;
-  prev_ncoded_fragis=ncoded_fragis=prev_nuncoded_fragis=nuncoded_fragis=0;
-  sb=sb_end=_dec->state.sbs;
-  for(pli=0;pli<3;pli++){
-    const oc_fragment_plane *fplane;
-    fplane=_dec->state.fplanes+pli;
-    sb_end+=fplane->nsbs;
-    for(;sb<sb_end;sb++){
-      int quadi;
-      for(quadi=0;quadi<4;quadi++)if(sb->quad_valid&1<<quadi){
-        int bi;
-        for(bi=0;bi<4;bi++){
-          int fragi;
-          fragi=sb->map[quadi][bi];
-          if(fragi>=0){
-            oc_fragment *frag;
-            frag=_dec->state.frags+fragi;
-            if(sb->coded_fully)frag->coded=1;
-            else if(!sb->coded_partially)frag->coded=0;
-            else{
-              if(run_count<=0){
-                run_count=oc_block_run_unpack(&_dec->opb);
-                flag=!flag;
-              }
-              run_count--;
-              frag->coded=flag;
-            }
-            if(frag->coded)_dec->state.coded_fragis[ncoded_fragis++]=fragi;
-            else *(_dec->state.uncoded_fragis-++nuncoded_fragis)=fragi;
-          }
-        }
-      }
-    }
-    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
-    prev_ncoded_fragis=ncoded_fragis;
-    _dec->state.nuncoded_fragis[pli]=nuncoded_fragis-prev_nuncoded_fragis;
-    prev_nuncoded_fragis=nuncoded_fragis;
-  }
-  /*TODO: run_count should be 0 here.
-    If it's not, we should issue a warning of some kind.*/
-}
-
-
-
-typedef int (*oc_mode_unpack_func)(oggpack_buffer *_opb);
-
-static int oc_vlc_mode_unpack(oggpack_buffer *_opb){
-  long val;
-  int  i;
-  for(i=0;i<7;i++){
-    theorapackB_read1(_opb,&val);
-    if(!val)break;
-  }
-  return i;
-}
-
-static int oc_clc_mode_unpack(oggpack_buffer *_opb){
-  long val;
-  theorapackB_read(_opb,3,&val);
-  return (int)val;
-}
-
-/*Unpacks the list of macro block modes for INTER frames.*/
-static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
-  oc_mode_unpack_func  mode_unpack;
-  oc_mb               *mb;
-  oc_mb               *mb_end;
-  const int           *alphabet;
-  long                 val;
-  int                  scheme0_alphabet[8];
-  int                  mode_scheme;
-  theorapackB_read(&_dec->opb,3,&val);
-  mode_scheme=(int)val;
-  if(mode_scheme==0){
-    int mi;
-    /*Just in case, initialize the modes to something.
-      If the bitstream doesn't contain each index exactly once, it's likely
-       corrupt and the rest of the packet is garbage anyway, but this way we
-       won't crash, and we'll decode SOMETHING.*/
-    /*LOOP VECTORIZES.*/
-    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
-    for(mi=0;mi<OC_NMODES;mi++){
-      theorapackB_read(&_dec->opb,3,&val);
-      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
-    }
-    alphabet=scheme0_alphabet;
-  }
-  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
-  if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
-  else mode_unpack=oc_vlc_mode_unpack;
-  mb=_dec->state.mbs;
-  mb_end=mb+_dec->state.nmbs;
-  for(;mb<mb_end;mb++){
-    if(mb->mode!=OC_MODE_INVALID){
-      int bi;
-      for(bi=0;bi<4;bi++){
-        int fragi;
-        fragi=mb->map[0][bi];
-        if(fragi>=0&&_dec->state.frags[fragi].coded)break;
-      }
-      if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
-      else mb->mode=OC_MODE_INTER_NOMV;
-    }
-  }
-}
-
-
-
-typedef int (*oc_mv_comp_unpack_func)(oggpack_buffer *_opb);
-
-static int oc_vlc_mv_comp_unpack(oggpack_buffer *_opb){
-  long bits;
-  int  mvsigned[2];
-  theorapackB_read(_opb,3,&bits);
-  switch(bits){
-    case  0:return 0;
-    case  1:return 1;
-    case  2:return -1;
-    case  3:
-    case  4:{
-      mvsigned[0]=(int)(bits-1);
-      theorapackB_read1(_opb,&bits);
-    }break;
-    /*case  5:
-    case  6:
-    case  7:*/
-    default:{
-      mvsigned[0]=1<<bits-3;
-      theorapackB_read(_opb,bits-2,&bits);
-      mvsigned[0]+=(int)(bits>>1);
-      bits&=1;
-    }break;
-  }
-  mvsigned[1]=-mvsigned[0];
-  return mvsigned[bits];
-}
-
-static int oc_clc_mv_comp_unpack(oggpack_buffer *_opb){
-  long bits;
-  int  mvsigned[2];
-  theorapackB_read(_opb,6,&bits);
-  mvsigned[0]=bits>>1;
-  mvsigned[1]=-mvsigned[0];
-  return mvsigned[bits&1];
-}
-
-/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
-   block modes and motion vectors to the individual fragments.*/
-static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
-  oc_set_chroma_mvs_func  set_chroma_mvs;
-  oc_mv_comp_unpack_func  mv_comp_unpack;
-  oc_mb                  *mb;
-  oc_mb                  *mb_end;
-  const int              *map_idxs;
-  long                    val;
-  int                     map_nidxs;
-  oc_mv                   last_mv[2];
-  oc_mv                   cbmvs[4];
-  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
-  theorapackB_read1(&_dec->opb,&val);
-  mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
-  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
-  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
-  memset(last_mv,0,sizeof(last_mv));
-  mb=_dec->state.mbs;
-  mb_end=mb+_dec->state.nmbs;
-  for(;mb<mb_end;mb++)if(mb->mode!=OC_MODE_INVALID){
-    oc_fragment *frag;
-    oc_mv        mbmv;
-    int          coded[13];
-    int          codedi;
-    int          ncoded;
-    int          mapi;
-    int          mapii;
-    int          fragi;
-    int          mb_mode;
-    /*Search for at least one coded fragment.*/
-    ncoded=mapii=0;
-    do{
-      mapi=map_idxs[mapii];
-      fragi=mb->map[mapi>>2][mapi&3];
-      if(fragi>=0&&_dec->state.frags[fragi].coded)coded[ncoded++]=mapi;
-    }
-    while(++mapii<map_nidxs);
-    if(ncoded<=0)continue;
-    mb_mode=mb->mode;
-    switch(mb_mode){
-      case OC_MODE_INTER_MV_FOUR:{
-        oc_mv       lbmvs[4];
-        int         bi;
-        /*Mark the tail of the list, so we don't accidentally go past it.*/
-        coded[ncoded]=-1;
-        for(bi=codedi=0;bi<4;bi++){
-          if(coded[codedi]==bi){
-            codedi++;
-            frag=_dec->state.frags+mb->map[0][bi];
-            frag->mbmode=mb_mode;
-            frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-            frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-          }
-          else lbmvs[bi][0]=lbmvs[bi][1]=0;
-        }
-        if(codedi>0){
-          last_mv[1][0]=last_mv[0][0];
-          last_mv[1][1]=last_mv[0][1];
-          last_mv[0][0]=lbmvs[coded[codedi-1]][0];
-          last_mv[0][1]=lbmvs[coded[codedi-1]][1];
-        }
-        if(codedi<ncoded){
-          (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
-          for(;codedi<ncoded;codedi++){
-            mapi=coded[codedi];
-            bi=mapi&3;
-            frag=_dec->state.frags+mb->map[mapi>>2][bi];
-            frag->mbmode=mb_mode;
-            frag->mv[0]=cbmvs[bi][0];
-            frag->mv[1]=cbmvs[bi][1];
-          }
-        }
-      }break;
-      case OC_MODE_INTER_MV:{
-        last_mv[1][0]=last_mv[0][0];
-        last_mv[1][1]=last_mv[0][1];
-        mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-        mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-      }break;
-      case OC_MODE_INTER_MV_LAST:{
-        mbmv[0]=last_mv[0][0];
-        mbmv[1]=last_mv[0][1];
-      }break;
-      case OC_MODE_INTER_MV_LAST2:{
-        mbmv[0]=last_mv[1][0];
-        mbmv[1]=last_mv[1][1];
-        last_mv[1][0]=last_mv[0][0];
-        last_mv[1][1]=last_mv[0][1];
-        last_mv[0][0]=mbmv[0];
-        last_mv[0][1]=mbmv[1];
-      }break;
-      case OC_MODE_GOLDEN_MV:{
-        mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-        mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-      }break;
-      default:mbmv[0]=mbmv[1]=0;break;
-    }
-    /*4MV mode fills in the fragments itself.
-      For all other modes we can use this common code.*/
-    if(mb_mode!=OC_MODE_INTER_MV_FOUR){
-      for(codedi=0;codedi<ncoded;codedi++){
-        mapi=coded[codedi];
-        fragi=mb->map[mapi>>2][mapi&3];
-        frag=_dec->state.frags+fragi;
-        frag->mbmode=mb_mode;
-        frag->mv[0]=mbmv[0];
-        frag->mv[1]=mbmv[1];
-      }
-    }
-  }
-}
-
-static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
-  oc_fragment *frag;
-  int         *coded_fragi;
-  int         *coded_fragi_end;
-  int          ncoded_fragis;
-  ncoded_fragis=_dec->state.ncoded_fragis[0]+
-   _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
-  if(ncoded_fragis<=0)return;
-  coded_fragi=_dec->state.coded_fragis;
-  coded_fragi_end=coded_fragi+ncoded_fragis;
-  if(_dec->state.nqis==1){
-    /*If this frame has only a single qi value, then just set it in all coded
-       fragments.*/
-    while(coded_fragi<coded_fragi_end){
-      _dec->state.frags[*coded_fragi++].qi=_dec->state.qis[0];
-    }
-  }
-  else{
-    long val;
-    int  flag;
-    int  nqi1;
-    int  run_count;
-    /*Otherwise, we decode a qi index for each fragment, using two passes of
-      the same binary RLE scheme used for super-block coded bits.
-     The first pass marks each fragment as having a qii of 0 or greater than
-      0, and the second pass (if necessary), distinguishes between a qii of
-      1 and 2.
-     At first we just store the qii in the fragment.
-     After all the qii's are decoded, we make a final pass to replace them
-      with the corresponding qi's for this frame.*/
-    theorapackB_read1(&_dec->opb,&val);
-    flag=(int)val;
-    run_count=nqi1=0;
-    while(coded_fragi<coded_fragi_end){
-      int full_run;
-      run_count=oc_sb_run_unpack(&_dec->opb);
-      full_run=run_count>=4129;
-      do{
-        _dec->state.frags[*coded_fragi++].qi=flag;
-        nqi1+=flag;
-      }
-      while(--run_count>0&&coded_fragi<coded_fragi_end);
-      if(full_run&&coded_fragi<coded_fragi_end){
-        theorapackB_read1(&_dec->opb,&val);
-        flag=(int)val;
-      }
-      else flag=!flag;
-    }
-    /*TODO: run_count should be 0 here.
-      If it's not, we should issue a warning of some kind.*/
-    /*If we have 3 different qi's for this frame, and there was at least one
-       fragment with a non-zero qi, make the second pass.*/
-    if(_dec->state.nqis==3&&nqi1>0){
-      /*Skip qii==0 fragments.*/
-      for(coded_fragi=_dec->state.coded_fragis;
-       _dec->state.frags[*coded_fragi].qi==0;coded_fragi++);
-      theorapackB_read1(&_dec->opb,&val);
-      flag=(int)val;
-      while(coded_fragi<coded_fragi_end){
-        int full_run;
-        run_count=oc_sb_run_unpack(&_dec->opb);
-        full_run=run_count>=4129;
-        for(;coded_fragi<coded_fragi_end;coded_fragi++){
-          oc_fragment *frag;
-          frag=_dec->state.frags+*coded_fragi;
-          if(frag->qi==0)continue;
-          if(run_count--<=0)break;
-          frag->qi+=flag;
-        }
-        if(full_run&&coded_fragi<coded_fragi_end){
-          theorapackB_read1(&_dec->opb,&val);
-          flag=(int)val;
-        }
-        else flag=!flag;
-      }
-      /*TODO: run_count should be 0 here.
-        If it's not, we should issue a warning of some kind.*/
-    }
-    /*Finally, translate qii's to qi's.*/
-    for(coded_fragi=_dec->state.coded_fragis;coded_fragi<coded_fragi_end;
-     coded_fragi++){
-      frag=_dec->state.frags+*coded_fragi;
-      frag->qi=_dec->state.qis[frag->qi];
-    }
-  }
-}
-
-
-
-/*Returns the decoded value of the given token.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to skip.
-  _extra_bits: The extra bits attached to this token.
-  Return: The decoded coefficient value.*/
-typedef int (*oc_token_dec1val_func)(int _token,int _extra_bits);
-
-/*Handles zero run tokens.*/
-static int oc_token_dec1val_zrl(void){
-  return 0;
-}
-
-/*Handles 1, -1, 2 and -2 tokens.*/
-static int oc_token_dec1val_const(int _token){
-  static const int CONST_VALS[4]={1,-1,2,-2};
-  return CONST_VALS[_token-OC_NDCT_ZRL_TOKEN_MAX];
-}
-
-/*Handles DCT value tokens category 2.*/
-static int oc_token_dec1val_cat2(int _token,int _extra_bits){
-  int valsigned[2];
-  valsigned[0]=_token-OC_DCT_VAL_CAT2+3;
-  valsigned[1]=-valsigned[0];
-  return valsigned[_extra_bits];
-}
-
-/*Handles DCT value tokens categories 3 through 8.*/
-static int oc_token_dec1val_cati(int _token,int _extra_bits){
-  static const int VAL_CAT_OFFS[6]={
-    OC_NDCT_VAL_CAT2_SIZE+3,
-    OC_NDCT_VAL_CAT2_SIZE+5,
-    OC_NDCT_VAL_CAT2_SIZE+9,
-    OC_NDCT_VAL_CAT2_SIZE+17,
-    OC_NDCT_VAL_CAT2_SIZE+33,
-    OC_NDCT_VAL_CAT2_SIZE+65
-  };
-  static const int VAL_CAT_MASKS[6]={
-    0x001,0x003,0x007,0x00F,0x01F,0x1FF
-  };
-  static const int VAL_CAT_SHIFTS[6]={1,2,3,4,5,9};
-  int valsigned[2];
-  int cati;
-  cati=_token-OC_NDCT_VAL_CAT2_MAX;
-  valsigned[0]=VAL_CAT_OFFS[cati]+(_extra_bits&VAL_CAT_MASKS[cati]);
-  valsigned[1]=-valsigned[0];
-  return valsigned[_extra_bits>>VAL_CAT_SHIFTS[cati]&1];
-}
-
-/*A jump table for compute the first coefficient value the given token value
-   represents.*/
-static const oc_token_dec1val_func OC_TOKEN_DEC1VAL_TABLE[TH_NDCT_TOKENS-
- OC_NDCT_EOB_TOKEN_MAX]={
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cati,
-  oc_token_dec1val_cati,
-  oc_token_dec1val_cati,
-  oc_token_dec1val_cati,
-  oc_token_dec1val_cati,
-  oc_token_dec1val_cati,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl
-};
-
-/*Returns the decoded value of the given token.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to skip.
-  _extra_bits: The extra bits attached to this token.
-  Return: The decoded coefficient value.*/
-static int oc_dct_token_dec1val(int _token,int _extra_bits){
-  return (*OC_TOKEN_DEC1VAL_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
-   _extra_bits);
-}
-
-/*Unpacks the DC coefficient tokens.
-  Unlike when unpacking the AC coefficient tokens, we actually need to decode
-   the DC coefficient values now so that we can do DC prediction.
-  _huff_idx:   The index of the Huffman table to use for each color plane.
-  _ntoks_left: The number of tokens left to be decoded in each color plane for
-                each coefficient.
-               This is updated as EOB tokens and zero run tokens are decoded.
-  Return: The length of any outstanding EOB run.*/
-static int oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[3],
- int _ntoks_left[3][64]){
-  long  val;
-  int  *coded_fragi;
-  int  *coded_fragi_end;
-  int   run_counts[64];
-  int   cfi;
-  int   eobi;
-  int   eobs;
-  int   ti;
-  int   ebi;
-  int   pli;
-  int   rli;
-  eobs=0;
-  ti=ebi=0;
-  coded_fragi_end=coded_fragi=_dec->state.coded_fragis;
-  for(pli=0;pli<3;pli++){
-    coded_fragi_end+=_dec->state.ncoded_fragis[pli];
-    memset(run_counts,0,sizeof(run_counts));
-    _dec->eob_runs[pli][0]=eobs;
-    /*Continue any previous EOB run, if there was one.*/
-    for(eobi=eobs;eobi-->0&&coded_fragi<coded_fragi_end;){
-      _dec->state.frags[*coded_fragi++].dc=0;
-    }
-    cfi=0;
-    while(eobs<_ntoks_left[pli][0]-cfi){
-      int token;
-      int neb;
-      int eb;
-      int skip;
-      cfi+=eobs;
-      run_counts[63]+=eobs;
-      token=oc_huff_token_decode(&_dec->opb,
-       _dec->huff_tables[_huff_idxs[pli]]);
-      _dec->dct_tokens[0][ti++]=(unsigned char)token;
-      neb=OC_DCT_TOKEN_EXTRA_BITS[token];
-      if(neb){
-        theorapackB_read(&_dec->opb,neb,&val);
-        eb=(int)val;
-        _dec->extra_bits[0][ebi++]=(ogg_uint16_t)eb;
-      }
-      else eb=0;
-      skip=oc_dct_token_skip(token,eb);
-      if(skip<0){
-        eobs=eobi=-skip;
-        while(eobi-->0&&coded_fragi<coded_fragi_end){
-          _dec->state.frags[*coded_fragi++].dc=0;
-        }
-      }
-      else{
-        run_counts[skip-1]++;
-        cfi++;
-        eobs=0;
-        _dec->state.frags[*coded_fragi++].dc=oc_dct_token_dec1val(token,eb);
-      }
-    }
-    _dec->ti0[pli][0]=ti;
-    _dec->ebi0[pli][0]=ebi;
-    /*Set the EOB count to the portion of the last EOB run which extends past
-       this coefficient.*/
-    eobs=eobs+cfi-_ntoks_left[pli][0];
-    /*Add the portion of the last EOB which was included in this coefficient to
-       to the longest run length.*/
-    run_counts[63]+=_ntoks_left[pli][0]-cfi;
-    /*And convert the run_counts array to a moment table.*/
-    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
-    /*Finally, subtract off the number of coefficients that have been
-       accounted for by runs started in this coefficient.*/
-    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
-  }
-  return eobs;
-}
-
-/*Unpacks the AC coefficient tokens.
-  This can completely discard coefficient values while unpacking, and so is
-   somewhat simpler than unpacking the DC coefficient tokens.
-  _huff_idx:   The index of the Huffman table to use for each color plane.
-  _ntoks_left: The number of tokens left to be decoded in each color plane for
-                each coefficient.
-               This is updated as EOB tokens and zero run tokens are decoded.
-  _eobs:       The length of any outstanding EOB run from previous
-                coefficients.
-  Return: The length of any outstanding EOB run.*/
-static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[3],
- int _ntoks_left[3][64],int _eobs){
-  long val;
-  int  run_counts[64];
-  int  cfi;
-  int  ti;
-  int  ebi;
-  int  pli;
-  int  rli;
-  ti=ebi=0;
-  for(pli=0;pli<3;pli++){
-    memset(run_counts,0,sizeof(run_counts));
-    _dec->eob_runs[pli][_zzi]=_eobs;
-    cfi=0;
-    while(_eobs<_ntoks_left[pli][_zzi]-cfi){
-      int token;
-      int neb;
-      int eb;
-      int skip;
-      cfi+=_eobs;
-      run_counts[63]+=_eobs;
-      token=oc_huff_token_decode(&_dec->opb,
-       _dec->huff_tables[_huff_idxs[pli]]);
-      _dec->dct_tokens[_zzi][ti++]=(unsigned char)token;
-      neb=OC_DCT_TOKEN_EXTRA_BITS[token];
-      if(neb){
-        theorapackB_read(&_dec->opb,neb,&val);
-        eb=(int)val;
-        _dec->extra_bits[_zzi][ebi++]=(ogg_uint16_t)eb;
-      }
-      else eb=0;
-      skip=oc_dct_token_skip(token,eb);
-      if(skip<0)_eobs=-skip;
-      else{
-        run_counts[skip-1]++;
-        cfi++;
-        _eobs=0;
-      }
-    }
-    _dec->ti0[pli][_zzi]=ti;
-    _dec->ebi0[pli][_zzi]=ebi;
-    /*Set the EOB count to the portion of the last EOB run which extends past
-       this coefficient.*/
-    _eobs=_eobs+cfi-_ntoks_left[pli][_zzi];
-    /*Add the portion of the last EOB which was included in this coefficient to
-       to the longest run length.*/
-    run_counts[63]+=_ntoks_left[pli][_zzi]-cfi;
-    /*And convert the run_counts array to a moment table.*/
-    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
-    /*Finally, subtract off the number of coefficients that have been
-       accounted for by runs started in this coefficient.*/
-    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
-  }
-  return _eobs;
-}
-
-/*Tokens describing the DCT coefficients that belong to each fragment are
-   stored in the bitstream grouped by coefficient, not by fragment.
-
-  This means that we either decode all the tokens in order, building up a
-   separate coefficient list for each fragment as we go, and then go back and
-   do the iDCT on each fragment, or we have to create separate lists of tokens
-   for each coefficient, so that we can pull the next token required off the
-   head of the appropriate list when decoding a specific fragment.
-
-  The former was VP3's choice, and it meant 2*w*h extra storage for all the
-   decoded coefficient values.
-
-  We take the second option, which lets us store just one or three bytes per
-   token (generally far fewer than the number of coefficients, due to EOB
-   tokens and zero runs), and which requires us to only maintain a counter for
-   each of the 64 coefficients, instead of a counter for every fragment to
-   determine where the next token goes.
-
-  Actually, we use 3 counters per coefficient, one for each color plane, so we
-   can decode all color planes simultaneously.
-
-  This lets color conversion, etc., be done as soon as a full MCU (one or
-   two super block rows) is decoded, while the image data is still in cache.*/
-
-static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
-  static const int OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
-  long val;
-  int  ntoks_left[3][64];
-  int  huff_idxs[3];
-  int  pli;
-  int  zzi;
-  int  hgi;
-  int  huffi_y;
-  int  huffi_c;
-  int  eobs;
-  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
-    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
-  }
-  theorapackB_read(&_dec->opb,4,&val);
-  huffi_y=(int)val;
-  theorapackB_read(&_dec->opb,4,&val);
-  huffi_c=(int)val;
-  huff_idxs[0]=huffi_y;
-  huff_idxs[1]=huff_idxs[2]=huffi_c;
-  _dec->eob_runs[0][0]=0;
-  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
-  theorapackB_read(&_dec->opb,4,&val);
-  huffi_y=(int)val;
-  theorapackB_read(&_dec->opb,4,&val);
-  huffi_c=(int)val;
-  zzi=1;
-  for(hgi=1;hgi<5;hgi++){
-    huff_idxs[0]=huffi_y+(hgi<<4);
-    huff_idxs[1]=huff_idxs[2]=huffi_c+(hgi<<4);
-    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
-      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
-    }
-  }
-  /*TODO: eobs should be exactly zero, or 4096 or greater.
-    The second case occurs when an EOB run of size zero is encountered, which
-     gets treated as an infinite EOB run (where infinity is INT_MAX).
-    If neither of these conditions holds, then a warning should be issued.*/
-}
-
-
-
-/*Expands a single token into the given coefficient list.
-  This fills in the zeros for zero runs as well as coefficient values, and
-   updates the index of the current coefficient.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to expand.
-  _extra_bits: The extra bits associated with the token.
-  _dct_coeffs: The current list of coefficients, in zig-zag order.
-  _zzi:        A pointer to the zig-zag index of the next coefficient to write
-                to.
-               This is updated before the function returns.*/
-typedef void (*oc_token_expand_func)(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi);
-
-/*Expands a zero run token.*/
-static void oc_token_expand_zrl(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  int zzi;
-  zzi=*_zzi;
-  do _dct_coeffs[zzi++]=0;
-  while(_extra_bits-->0);
-  *_zzi=zzi;
-}
-
-/*Expands a constant, single-value token.*/
-static void oc_token_expand_const(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  _dct_coeffs[(*_zzi)++]=(ogg_int16_t)oc_token_dec1val_const(_token);
-}
-
-/*Expands category 2 single-valued tokens.*/
-static void oc_token_expand_cat2(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  _dct_coeffs[(*_zzi)++]=
-   (ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
-}
-
-/*Expands category 3 through 8 single-valued tokens.*/
-static void oc_token_expand_cati(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  _dct_coeffs[(*_zzi)++]=
-   (ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits);
-}
-
-/*Expands a category 1a zero run/value combo token.*/
-static void oc_token_expand_run_cat1a(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  int zzi;
-  int rl;
-  zzi=*_zzi;
-  /*LOOP VECTORIZES.*/
-  for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[zzi++]=0;
-  _dct_coeffs[zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
-  *_zzi=zzi;
-}
-
-/*Expands all other zero run/value combo tokens.*/
-static void oc_token_expand_run(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int *_zzi){
-  static const int NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
-    6,10,1,2
-  };
-  static const int NZEROS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
-    3,7,0,1
-  };
-  static const int VALUE_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
-    0,0,0,1
-  };
-  static const int VALUE_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
-    0,0,1,1
-  };
-  static const int VALUE_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
-    1,1,2,2
-  };
-  static const int SIGN_SHIFT[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
-    2,3,1,2
-  };
-  int valsigned[2];
-  int zzi;
-  int rl;
-  _token-=OC_DCT_RUN_CAT1B;
-  rl=(_extra_bits&NZEROS_MASK[_token])+NZEROS_ADJUST[_token];
-  zzi=*_zzi;
-  /*LOOP VECTORIZES.*/
-  while(rl-->0)_dct_coeffs[zzi++]=0;
-  valsigned[0]=VALUE_ADJUST[_token]+
-   (_extra_bits>>VALUE_SHIFT[_token]&VALUE_MASK[_token]);
-  valsigned[1]=-valsigned[0];
-  _dct_coeffs[zzi++]=(ogg_int16_t)valsigned[
-   _extra_bits>>SIGN_SHIFT[_token]];
-  *_zzi=zzi;
-}
-
-/*A jump table for expanding token values into coefficient values.
-  This reduces all the conditional branches, etc., needed to parse these token
-   values down to one indirect jump.*/
-static const oc_token_expand_func OC_TOKEN_EXPAND_TABLE[TH_NDCT_TOKENS-
- OC_NDCT_EOB_TOKEN_MAX]={
-  oc_token_expand_zrl,
-  oc_token_expand_zrl,
-  oc_token_expand_const,
-  oc_token_expand_const,
-  oc_token_expand_const,
-  oc_token_expand_const,
-  oc_token_expand_cat2,
-  oc_token_expand_cat2,
-  oc_token_expand_cat2,
-  oc_token_expand_cat2,
-  oc_token_expand_cati,
-  oc_token_expand_cati,
-  oc_token_expand_cati,
-  oc_token_expand_cati,
-  oc_token_expand_cati,
-  oc_token_expand_cati,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run,
-  oc_token_expand_run,
-  oc_token_expand_run,
-  oc_token_expand_run
-};
-
-/*Expands a single token into the given coefficient list.
-  This fills in the zeros for zero runs as well as coefficient values, and
-   updates the index of the current coefficient.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to expand.
-  _extra_bits: The extra bits associated with the token.
-  _dct_coeffs: The current list of coefficients, in zig-zag order.
-  _zzi:        A pointer to the zig-zag index of the next coefficient to write
-                to.
-               This is updated before the function returns.*/
-static void oc_dct_token_expand(int _token,int _extra_bits,
- ogg_int16_t *_dct_coeffs,int *_zzi){
-  (*OC_TOKEN_EXPAND_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
-   _extra_bits,_dct_coeffs,_zzi);
-}
-
-
-
-static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
-  /*pp_level 0: disabled; free any memory used and return*/
-  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
-    if(_dec->dc_qis!=NULL){
-      _ogg_free(_dec->dc_qis);
-      _dec->dc_qis=NULL;
-      _ogg_free(_dec->variances);
-      _dec->variances=NULL;
-      _ogg_free(_dec->pp_frame_data);
-      _dec->pp_frame_data=NULL;
-    }
-    return 1;
-  }
-  if(_dec->dc_qis==NULL){
-    /*If we haven't been tracking DC quantization indices, there's no point in
-       starting now.*/
-    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
-    _dec->dc_qis=(unsigned char *)_ogg_malloc(
-     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
-    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
-  }
-  else{
-    int           *coded_fragi;
-    int           *coded_fragi_end;
-    unsigned char  qi0;
-    /*Update the DC quantization index of each coded block.*/
-    qi0=(unsigned char)_dec->state.qis[0];
-    coded_fragi_end=_dec->state.coded_fragis+_dec->state.ncoded_fragis[0]+
-     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
-    for(coded_fragi=_dec->state.coded_fragis;coded_fragi<coded_fragi_end;
-     coded_fragi++){
-      _dec->dc_qis[*coded_fragi]=qi0;
-    }
-  }
-  /*pp_level 1: Stop after updating DC quantization indices.*/
-  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
-    if(_dec->variances!=NULL){
-      _ogg_free(_dec->variances);
-      _dec->variances=NULL;
-      _ogg_free(_dec->pp_frame_data);
-      _dec->pp_frame_data=NULL;
-    }
-    return 1;
-  }
-  if(_dec->variances==NULL||
-   _dec->pp_frame_has_chroma!=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
-    size_t frame_sz;
-    frame_sz=_dec->state.info.frame_width*_dec->state.info.frame_height;
-    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
-      _dec->variances=(int *)_ogg_realloc(_dec->variances,
-       _dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0]));
-      _dec->pp_frame_data=(unsigned char *)_ogg_realloc(
-       _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
-      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
-      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
-      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
-      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
-       (1-_dec->pp_frame_buf[0].height)*_dec->pp_frame_buf[0].stride;
-    }
-    else{
-      size_t y_sz;
-      size_t c_sz;
-      int    c_w;
-      int    c_h;
-      _dec->variances=(int *)_ogg_realloc(_dec->variances,
-       _dec->state.nfrags*sizeof(_dec->variances[0]));
-      y_sz=frame_sz;
-      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
-      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
-      c_sz=c_w*c_h;
-      frame_sz+=c_sz<<1;
-      _dec->pp_frame_data=(unsigned char *)_ogg_realloc(
-       _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
-      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
-      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
-      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
-      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
-      _dec->pp_frame_buf[1].width=c_w;
-      _dec->pp_frame_buf[1].height=c_h;
-      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
-      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
-      _dec->pp_frame_buf[2].width=c_w;
-      _dec->pp_frame_buf[2].height=c_h;
-      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
-      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
-      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
-    }
-    _dec->pp_frame_has_chroma=(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
-  }
-  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
-  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
-    memcpy(_dec->pp_frame_buf+1,
-     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
-     sizeof(_dec->pp_frame_buf[1])*2);
-  }
-  return 0;
-}
-
-
-
-typedef struct{
-  int  ti[3][64];
-  int  ebi[3][64];
-  int  eob_runs[3][64];
-  int  bounding_values[256];
-  int *coded_fragis[3];
-  int *uncoded_fragis[3];
-  int  fragy0[3];
-  int  fragy_end[3];
-  int  ncoded_fragis[3];
-  int  nuncoded_fragis[3];
-  int  pred_last[3][3];
-  int  mcu_nvfrags;
-  int  loop_filter;
-  int  pp_level;
-}oc_dec_pipeline_state;
-
-
-
-/*Initialize the main decoding pipeline.*/
-static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe){
-  int *coded_fragi_end;
-  int *uncoded_fragi_end;
-  int  pli;
-  /*If chroma is sub-sampled in the vertical direction, we have to decode two
-     super block rows of Y' for each super block row of Cb and Cr.*/
-  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
-  /*Initialize the token and extra bits indices for each plane and
-     coefficient.*/
-  memset(_pipe->ti[0],0,sizeof(_pipe->ti[0]));
-  memset(_pipe->ebi[0],0,sizeof(_pipe->ebi[0]));
-  for(pli=1;pli<3;pli++){
-    memcpy(_pipe->ti[pli],_dec->ti0[pli-1],sizeof(_pipe->ti[0]));
-    memcpy(_pipe->ebi[pli],_dec->ebi0[pli-1],sizeof(_pipe->ebi[0]));
-  }
-  /*Also copy over the initial the EOB run counts.*/
-  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
-  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
-  coded_fragi_end=_dec->state.coded_fragis;
-  uncoded_fragi_end=_dec->state.uncoded_fragis;
-  for(pli=0;pli<3;pli++){
-    _pipe->coded_fragis[pli]=coded_fragi_end;
-    _pipe->uncoded_fragis[pli]=uncoded_fragi_end;
-    coded_fragi_end+=_dec->state.ncoded_fragis[pli];
-    uncoded_fragi_end-=_dec->state.nuncoded_fragis[pli];
-  }
-  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
-  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
-  /*Initialize the bounding value array for the loop filter.*/
-  _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
-   _pipe->bounding_values);
-  /*Initialize any buffers needed for post-processing.
-    We also save the current post-processing level, to guard against the user
-     changing it from a callback.*/
-  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
-  /*If we don't have enough information to post-process, disable it, regardless
-     of the user-requested level.*/
-  else{
-    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
-    memcpy(_dec->pp_frame_buf,
-     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
-     sizeof(_dec->pp_frame_buf[0])*3);
-  }
-}
-
-/*Undo the DC prediction in a single plane of an MCU (one or two super block
-   rows).
-  As a side effect, the number of coded and uncoded fragments in this plane of
-   the MCU is also computed.*/
-static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe,int _pli){
-  /*Undo the DC prediction.*/
-  oc_fragment_plane *fplane;
-  oc_fragment       *frag;
-  int               *pred_last;
-  int                ncoded_fragis;
-  int                fragx;
-  int                fragy;
-  int                fragy0;
-  int                fragy_end;
-  /*Compute the first and last fragment row of the current MCU for this
-     plane.*/
-  fplane=_dec->state.fplanes+_pli;
-  fragy0=_pipe->fragy0[_pli];
-  fragy_end=_pipe->fragy_end[_pli];
-  frag=_dec->state.frags+fplane->froffset+(fragy0*fplane->nhfrags);
-  ncoded_fragis=0;
-  pred_last=_pipe->pred_last[_pli];
-  for(fragy=fragy0;fragy<fragy_end;fragy++){
-    for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++){
-      if(!frag->coded)continue;
-      pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
-       oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
-      ncoded_fragis++;
-    }
-  }
-  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
-  /*Also save the number of uncoded fragments so we know how many to copy.*/
-  _pipe->nuncoded_fragis[_pli]=
-   (fragy_end-fragy0)*fplane->nhfrags-ncoded_fragis;
-}
-
-/*Reconstructs all coded fragments in a single MCU (one or two super block
-   rows).
-  This requires that each coded fragment have a proper macro block mode and
-   motion vector (if not in INTRA mode), and have it's DC value decoded, with
-   the DC prediction process reversed, and the number of coded and uncoded
-   fragments in this plane of the MCU be counted.
-  The token lists for each color plane and coefficient should also be filled
-   in, along with initial token offsets, extra bits offsets, and EOB run
-   counts.*/
-static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
- oc_dec_pipeline_state *_pipe,int _pli){
-  /*Decode the AC coefficients.*/
-  int *ti;
-  int *ebi;
-  int *eob_runs;
-  int *coded_fragi;
-  int *coded_fragi_end;
-  ti=_pipe->ti[_pli];
-  ebi=_pipe->ebi[_pli];
-  eob_runs=_pipe->eob_runs[_pli];
-  coded_fragi_end=coded_fragi=_pipe->coded_fragis[_pli];
-  coded_fragi_end+=_pipe->ncoded_fragis[_pli];
-  for(;coded_fragi<coded_fragi_end;coded_fragi++){
-    oc_fragment    *frag;
-    oc_quant_table *iquants;
-    /*This array is made one bigger than necessary so that an invalid zero
-       run cannot cause a buffer overflow.
-      The inverse zig-zag mapping sends all out of range indices to the last
-       entry of this array, where they are ignored.*/
-    ogg_int16_t    dct_coeffs[128];
-    int            fragi;
-    int            zzi;
-    int            last_zzi;
-    fragi=*coded_fragi;
-    frag=_dec->state.frags+fragi;
-    for(zzi=0;zzi<64;){
-      int token;
-      int eb;
-      last_zzi=zzi;
-      if(eob_runs[zzi]){
-        eob_runs[zzi]--;
-        break;
-      }
-      else{
-        int ebflag;
-        token=_dec->dct_tokens[zzi][ti[zzi]++];
-        ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0;
-        eb=_dec->extra_bits[zzi][ebi[zzi]]&-ebflag;
-        ebi[zzi]+=ebflag;
-        if(token<OC_NDCT_EOB_TOKEN_MAX){
-          eob_runs[zzi]=-oc_dct_token_skip(token,eb);
-        }
-        else oc_dct_token_expand(token,eb,dct_coeffs,&zzi);
-      }
-    }
-    /*TODO: zzi should be exactly 64 here.
-      If it's not, we should report some kind of warning.*/
-    zzi=OC_MINI(zzi,64);
-    dct_coeffs[0]=(ogg_int16_t)frag->dc;
-    iquants=_dec->state.dequant_tables[frag->mbmode!=OC_MODE_INTRA][_pli];
-    /*last_zzi is always initialized.
-      If your compiler thinks otherwise, it is dumb.*/
-    oc_state_frag_recon(&_dec->state,frag,_pli,dct_coeffs,last_zzi,zzi,
-     iquants[_dec->state.qis[0]][0],iquants[frag->qi]);
-  }
-  _pipe->coded_fragis[_pli]=coded_fragi;
-  /*Right now the reconstructed MCU has only the coded blocks in it.*/
-  /*TODO: We make the decision here to always copy the uncoded blocks into it
-     from the reference frame.
-    We could also copy the coded blocks back over the reference frame, if we
-     wait for an additional MCU to be decoded, which might be faster if only a
-     small number of blocks are coded.
-    However, this introduces more latency, creating a larger cache footprint.
-    It's unknown which decision is better, but this one results in simpler
-     code, and the hard case (high bitrate, high resolution) is handled
-     correctly.*/
-  /*Copy the uncoded blocks from the previous reference frame.*/
-  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
-  oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli],
-   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
-}
-
-/*Filter a horizontal block edge.*/
-static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
- int *_variance0,int *_variance1){
-  unsigned char       *rdst;
-  const unsigned char *rsrc;
-  unsigned char       *cdst;
-  const unsigned char *csrc;
-  int                  r[10];
-  int                  sum0;
-  int                  sum1;
-  int                  bx;
-  int                  by;
-  rdst=_dst;
-  rsrc=_src;
-  for(bx=0;bx<8;bx++){
-    cdst=rdst;
-    csrc=rsrc;
-    for(by=0;by<10;by++){
-      r[by]=*csrc;
-      csrc+=_src_ystride;
-    }
-    sum0=sum1=0;
-    for(by=0;by<4;by++){
-      sum0+=abs(r[by+1]-r[by]);
-      sum1+=abs(r[by+5]-r[by+6]);
-    }
-    *_variance0+=OC_MINI(255,sum0);
-    *_variance1+=OC_MINI(255,sum1);
-    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
-      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
-      cdst+=_dst_ystride;
-      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
-      cdst+=_dst_ystride;
-      for(by=0;by<4;by++){
-        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
-         r[by+4]+r[by+5]+r[by+6]+4>>3);
-        cdst+=_dst_ystride;
-      }
-      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
-      cdst+=_dst_ystride;
-      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
-    }
-    else{
-      for(by=1;by<=8;by++){
-        *cdst=(unsigned char)r[by];
-        cdst+=_dst_ystride;
-      }
-    }
-    rdst++;
-    rsrc++;
-  }
-}
-
-/*Filter a vertical block edge.*/
-static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
- int _qstep,int _flimit,int *_variances){
-  unsigned char       *rdst;
-  const unsigned char *rsrc;
-  unsigned char       *cdst;
-  int                  r[10];
-  int                  sum0;
-  int                  sum1;
-  int                  bx;
-  int                  by;
-  cdst=_dst;
-  for(by=0;by<8;by++){
-    rsrc=cdst-1;
-    rdst=cdst;
-    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
-    sum0=sum1=0;
-    for(bx=0;bx<4;bx++){
-      sum0+=abs(r[bx+1]-r[bx]);
-      sum1+=abs(r[bx+5]-r[bx+6]);
-    }
-    _variances[0]+=OC_MINI(255,sum0);
-    _variances[1]+=OC_MINI(255,sum1);
-    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
-      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
-      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
-      for(bx=0;bx<4;bx++){
-        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
-         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
-      }
-      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
-      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
-    }
-    else for(bx=1;bx<=8;bx++)*rdst++=(unsigned char)r[bx];
-    cdst+=_dst_ystride;
-  }
-}
-
-static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
- th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
- int _fragy_end){
-  oc_fragment_plane   *fplane;
-  int                 *variance;
-  unsigned char       *dc_qi;
-  unsigned char       *dst;
-  const unsigned char *src;
-  int                  notstart;
-  int                  notdone;
-  int                  froffset;
-  int                  flimit;
-  int                  qstep;
-  int                  y_end;
-  int                  y;
-  int                  x;
-  _dst+=_pli;
-  _src+=_pli;
-  fplane=_dec->state.fplanes+_pli;
-  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
-  variance=_dec->variances+froffset;
-  dc_qi=_dec->dc_qis+froffset;
-  notstart=_fragy0>0;
-  notdone=_fragy_end<fplane->nvfrags;
-  /*We want to clear an extra row of variances, except at the end.*/
-  memset(variance+(fplane->nhfrags&-notstart),0,
-   (_fragy_end+notdone-_fragy0-notstart)*fplane->nhfrags*sizeof(variance[0]));
-  /*Except for the first time, we want to point to the middle of the row.*/
-  y=(_fragy0<<3)+(notstart<<2);
-  dst=_dst->data+y*_dst->stride;
-  src=_src->data+y*_src->stride;
-  for(;y<4;y++){
-    memcpy(dst,src,_dst->width*sizeof(dst[0]));
-    dst+=_dst->stride;
-    src+=_src->stride;
-  }
-  /*We also want to skip the last row in the frame for this loop.*/
-  y_end=_fragy_end-!notdone<<3;
-  for(;y<y_end;y+=8){
-    qstep=_dec->pp_dc_scale[*dc_qi];
-    flimit=(qstep*3)>>2;
-    oc_filter_hedge(dst,_dst->stride,src-_src->stride,_src->stride,
-     qstep,flimit,variance,variance+fplane->nhfrags);
-    variance++;
-    dc_qi++;
-    for(x=8;x<_dst->width;x+=8){
-      qstep=_dec->pp_dc_scale[*dc_qi];
-      flimit=(qstep*3)>>2;
-      oc_filter_hedge(dst+x,_dst->stride,src+x-_src->stride,_src->stride,
-       qstep,flimit,variance,variance+fplane->nhfrags);
-      oc_filter_vedge(dst+x-(_dst->stride<<2)-4,_dst->stride,
-       qstep,flimit,variance-1);
-      variance++;
-      dc_qi++;
-    }
-    dst+=_dst->stride<<3;
-    src+=_src->stride<<3;
-  }
-  /*And finally, handle the last row in the frame, if it's in the range.*/
-  if(!notdone){
-    for(;y<_dst->height;y++){
-      memcpy(dst,src,_dst->width*sizeof(dst[0]));
-      dst+=_dst->stride;
-      src+=_src->stride;
-    }
-    /*Filter the last row of vertical block edges.*/
-    dc_qi++;
-    for(x=8;x<_dst->width;x+=8){
-      qstep=_dec->pp_dc_scale[*dc_qi++];
-      flimit=(qstep*3)>>2;
-      oc_filter_vedge(dst+x-(_dst->stride<<3)-4,_dst->stride,
-       qstep,flimit,variance++);
-    }
-  }
-}
-
-static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
- int _dc_scale,int _sharp_mod,int _strong){
-  static const int     MOD_MAX[2]={24,32};
-  static const int     MOD_SHIFT[2]={1,0};
-  const unsigned char *psrc;
-  const unsigned char *src;
-  const unsigned char *nsrc;
-  unsigned char       *dst;
-  int                  vmod[72];
-  int                  hmod[72];
-  int                  mod_hi;
-  int                  by;
-  int                  bx;
-  mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]);
-  dst=_idata;
-  src=dst;
-  psrc=src-(_ystride&-!(_b&4));
-  for(by=0;by<9;by++){
-    for(bx=0;bx<8;bx++){
-      int mod;
-      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<MOD_SHIFT[_strong]);
-      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
-    }
-    psrc=src;
-    src+=_ystride&-(!(_b&8)|by<7);
-  }
-  nsrc=dst;
-  psrc=dst-!(_b&1);
-  for(bx=0;bx<9;bx++){
-    src=nsrc;
-    for(by=0;by<8;by++){
-      int mod;
-      mod=32+_dc_scale-(abs(*src-*psrc)<<MOD_SHIFT[_strong]);
-      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
-      psrc+=_ystride;
-      src+=_ystride;
-    }
-    psrc=nsrc;
-    nsrc+=!(_b&2)|bx<7;
-  }
-  src=dst;
-  psrc=src-(_ystride&-!(_b&4));
-  nsrc=src+_ystride;
-  for(by=0;by<8;by++){
-    int a;
-    int b;
-    int w;
-    a=128;
-    b=64;
-    w=hmod[by];
-    a-=w;
-    b+=w**(src-!(_b&1));
-    w=vmod[(by<<3)];
-    a-=w;
-    b+=w*psrc[0];
-    w=vmod[(by+1<<3)];
-    a-=w;
-    b+=w*nsrc[0];
-    w=hmod[(1<<3)+by];
-    a-=w;
-    b+=w*src[1];
-    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
-    for(bx=1;bx<7;bx++){
-      a=128;
-      b=64;
-      w=hmod[(bx<<3)+by];
-      a-=w;
-      b+=w*src[bx-1];
-      w=vmod[(by<<3)+bx];
-      a-=w;
-      b+=w*psrc[bx];
-      w=vmod[(by+1<<3)+bx];
-      a-=w;
-      b+=w*nsrc[bx];
-      w=hmod[(bx+1<<3)+by];
-      a-=w;
-      b+=w*src[bx+1];
-      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
-    }
-    a=128;
-    b=64;
-    w=hmod[(7<<3)+by];
-    a-=w;
-    b+=w*src[6];
-    w=vmod[(by<<3)+7];
-    a-=w;
-    b+=w*psrc[7];
-    w=vmod[(by+1<<3)+7];
-    a-=w;
-    b+=w*nsrc[7];
-    w=hmod[(8<<3)+by];
-    a-=w;
-    b+=w*src[7+!(_b&2)];
-    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
-    dst+=_ystride;
-    psrc=src;
-    src=nsrc;
-    nsrc+=_ystride&-(!(_b&8)|by<6);
-  }
-}
-
-#define OC_DERING_THRESH1 (384)
-#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
-#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
-#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
-
-static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
- int _pli,int _fragy0,int _fragy_end){
-  th_img_plane      *iplane;
-  oc_fragment_plane *fplane;
-  oc_fragment       *frag;
-  int               *variance;
-  unsigned char     *idata;
-  int                sthresh;
-  int                strong;
-  int                froffset;
-  int                y_end;
-  int                y;
-  int                x;
-  iplane=_img+_pli;
-  fplane=_dec->state.fplanes+_pli;
-  froffset=fplane->froffset+_fragy0*fplane->nhfrags;
-  variance=_dec->variances+froffset;
-  frag=_dec->state.frags+froffset;
-  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
-  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
-  y=_fragy0<<3;
-  idata=iplane->data+y*iplane->stride;
-  y_end=_fragy_end<<3;
-  for(;y<y_end;y+=8){
-    for(x=0;x<iplane->width;x+=8){
-      int b;
-      int qi;
-      int var;
-      qi=frag->qi;
-      var=*variance;
-      b=(x<=0)|(x+8>=iplane->width)<<1|(y<=0)<<2|(y+8>=iplane->height)<<3;
-      if(strong&&var>sthresh){
-        oc_dering_block(idata+x,iplane->stride,b,
-         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
-         !(b&2)&&variance[1]>OC_DERING_THRESH4||
-         !(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
-         !(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
-          oc_dering_block(idata+x,iplane->stride,b,
-           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-          oc_dering_block(idata+x,iplane->stride,b,
-           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-        }
-      }
-      else if(var>OC_DERING_THRESH2){
-        oc_dering_block(idata+x,iplane->stride,b,
-         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-      }
-      else if(var>OC_DERING_THRESH1){
-        oc_dering_block(idata+x,iplane->stride,b,
-         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
-      }
-      frag++;
-      variance++;
-    }
-    idata+=iplane->stride<<3;
-  }
-}
-
-
-
-th_dec_ctx *th_decode_alloc(const th_info *_info,
- const th_setup_info *_setup){
-  oc_dec_ctx *dec;
-  if(_info==NULL||_setup==NULL)return NULL;
-  dec=_ogg_malloc(sizeof(*dec));
-  if(oc_dec_init(dec,_info,_setup)<0){
-    _ogg_free(dec);
-    return NULL;
-  }
-  dec->state.curframe_num=0;
-  return dec;
-}
-
-void th_decode_free(th_dec_ctx *_dec){
-  if(_dec!=NULL){
-    oc_dec_clear(_dec);
-    _ogg_free(_dec);
-  }
-}
-
-int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
- size_t _buf_sz){
-  switch(_req){
-    case TH_DECCTL_GET_PPLEVEL_MAX:{
-      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-      if(_buf_sz!=sizeof(int))return TH_EINVAL;
-      (*(int *)_buf)=OC_PP_LEVEL_MAX;
-      return 0;
-    }break;
-    case TH_DECCTL_SET_PPLEVEL:{
-      int pp_level;
-      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-      if(_buf_sz!=sizeof(int))return TH_EINVAL;
-      pp_level=*(int *)_buf;
-      if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
-      _dec->pp_level=pp_level;
-      return 0;
-    }break;
-    case TH_DECCTL_SET_GRANPOS:{
-      ogg_int64_t granpos;
-      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-      if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
-      granpos=*(ogg_int64_t *)_buf;
-      if(granpos<0)return TH_EINVAL;
-      _dec->state.granpos=granpos;
-      _dec->state.keyframe_num=
-       granpos>>_dec->state.info.keyframe_granule_shift;
-      _dec->state.curframe_num=_dec->state.keyframe_num+
-       (granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
-      return 0;
-    }break;
-    case TH_DECCTL_SET_STRIPE_CB:{
-      th_stripe_callback *cb;
-      if(_dec==NULL||_buf==NULL)return TH_EFAULT;
-      if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
-      cb=(th_stripe_callback *)_buf;
-      _dec->stripe_cb.ctx=cb->ctx;
-      _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
-      return 0;
-    }break;
-    default:return TH_EIMPL;
-  }
-}
-
-int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
- ogg_int64_t *_granpos){
-  int ret;
-  if(_dec==NULL||_op==NULL)return TH_EFAULT;
-  /*A completely empty packet indicates a dropped frame and is treated exactly
-     like an inter frame with no coded blocks.
-    Only proceed if we have a non-empty packet.*/
-  if(_op->bytes!=0){
-    oc_dec_pipeline_state pipe;
-    th_ycbcr_buffer       stripe_buf;
-    int                   stripe_fragy;
-    int                   refi;
-    int                   pli;
-    int                   notstart;
-    int                   notdone;
-    theorapackB_readinit(&_dec->opb,_op->packet,_op->bytes);
-    ret=oc_dec_frame_header_unpack(_dec);
-    if(ret<0)return ret;
-    /*Select a free buffer to use for the reconstructed version of this
-       frame.*/
-    if(_dec->state.frame_type!=OC_INTRA_FRAME&&
-     (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
-     _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
-      th_info *info;
-      size_t       yplane_sz;
-      size_t       cplane_sz;
-      int          yhstride;
-      int          yvstride;
-      int          chstride;
-      int          cvstride;
-      /*We're decoding an INTER frame, but have no initialized reference
-         buffers (i.e., decoding did not start on a key frame).
-        We initialize them to a solid gray here.*/
-      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
-      _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
-      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi=1;
-      info=&_dec->state.info;
-      yhstride=info->frame_width+2*OC_UMV_PADDING;
-      yvstride=info->frame_height+2*OC_UMV_PADDING;
-      chstride=yhstride>>!(info->pixel_fmt&1);
-      cvstride=yvstride>>!(info->pixel_fmt&2);
-      yplane_sz=(size_t)yhstride*yvstride;
-      cplane_sz=(size_t)chstride*cvstride;
-      memset(_dec->state.ref_frame_data,0x80,yplane_sz+2*cplane_sz);
-    }
-    else{
-      for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
-       refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
-      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
-    }
-    if(_dec->state.frame_type==OC_INTRA_FRAME){
-      oc_dec_mark_all_intra(_dec);
-      _dec->state.keyframe_num=_dec->state.curframe_num;
-    }else{
-      oc_dec_coded_flags_unpack(_dec);
-      oc_dec_mb_modes_unpack(_dec);
-      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
-    }
-    oc_dec_block_qis_unpack(_dec);
-    oc_dec_residual_tokens_unpack(_dec);
-    /*Update granule position.
-      This must be done before the striped decode callbacks so that the
-       application knows what to do with the frame data.*/
-    _dec->state.granpos=
-     (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
-     (_dec->state.curframe_num-_dec->state.keyframe_num);
-    _dec->state.curframe_num++;
-    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
-    /*All of the rest of the operations -- DC prediction reversal,
-       reconstructing coded fragments, copying uncoded fragments, loop
-       filtering, extending borders, and out-of-loop post-processing -- should
-       be pipelined.
-      I.e., DC prediction reversal, reconstruction, and uncoded fragment
-       copying are done for one or two super block rows, then loop filtering is
-       run as far as it can, then bordering copying, then post-processing.
-      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
-       block rows, and one chroma.
-      Otherwise, an MCU consists of one super block row from each plane.
-      Inside each MCU, we perform all of the steps on one color plane before
-       moving on to the next.
-      After reconstruction, the additional filtering stages introduce a delay
-       since they need some pixels from the next fragment row.
-      Thus the actual number of decoded rows available is slightly smaller for
-       the first MCU, and slightly larger for the last.
-
-      This entire process allows us to operate on the data while it is still in
-       cache, resulting in big performance improvements.
-      An application callback allows further application processing (blitting
-       to video memory, color conversion, etc.) to also use the data while it's
-       in cache.*/
-    oc_dec_pipeline_init(_dec,&pipe);
-    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
-    notstart=0;
-    notdone=1;
-    for(stripe_fragy=notstart=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
-      int avail_fragy0;
-      int avail_fragy_end;
-      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
-      notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
-      for(pli=0;pli<3;pli++){
-        oc_fragment_plane *fplane;
-        int                frag_shift;
-        int                pp_offset;
-        int                sdelay;
-        int                edelay;
-        fplane=_dec->state.fplanes+pli;
-        /*Compute the first and last fragment row of the current MCU for this
-           plane.*/
-        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
-        pipe.fragy0[pli]=stripe_fragy>>frag_shift;
-        pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
-         pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
-        oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
-        oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
-        sdelay=edelay=0;
-        if(pipe.loop_filter){
-          sdelay+=notstart;
-          edelay+=notdone;
-          oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
-           refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
-        }
-        /*To fill the borders, we have an additional two pixel delay, since a
-           fragment in the next row could filter its top edge, using two pixels
-           from a fragment in this row.
-          But there's no reason to delay a full fragment between the two.*/
-        oc_state_borders_fill_rows(&_dec->state,refi,pli,
-         (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
-         (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
-        /*Out-of-loop post-processing.*/
-        pp_offset=3*(pli!=0);
-        if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
-          /*Perform de-blocking in one plane.*/
-          sdelay+=notstart;
-          edelay+=notdone;
-          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
-           _dec->state.ref_frame_bufs[refi],pli,
-           pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
-          if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
-            /*Perform de-ringing in one plane.*/
-            sdelay+=notstart;
-            edelay+=notdone;
-            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
-             pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
-          }
-        }
-        /*If no post-processing is done, we still need to delay a row for the
-           loop filter, thanks to the strange filtering order VP3 chose.*/
-        else if(pipe.loop_filter){
-          sdelay+=notstart;
-          edelay+=notdone;
-        }
-        /*Compute the intersection of the available rows in all planes.
-          If chroma is sub-sampled, the effect of each of its delays is
-           doubled, but luma might have more post-processing filters enabled
-           than chroma, so we don't know up front which one is the limiting
-           factor.*/
-        avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
-        avail_fragy_end=OC_MINI(avail_fragy_end,
-         pipe.fragy_end[pli]-edelay<<frag_shift);
-      }
-      if(_dec->stripe_cb.stripe_decoded!=NULL){
-        /*Make the callback, ensuring we flip the sense of the "start" and
-           "end" of the available region upside down.*/
-        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
-         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
-         _dec->state.fplanes[0].nvfrags-avail_fragy0);
-      }
-      notstart=1;
-    }
-    /*Finish filling in the reference frame borders.*/
-    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
-    /*Update the reference frame indices.*/
-    if(_dec->state.frame_type==OC_INTRA_FRAME){
-      /*The new frame becomes both the previous and gold reference frames.*/
-      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
-       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
-       _dec->state.ref_frame_idx[OC_FRAME_SELF];
-    }
-    else{
-      /*Otherwise, just replace the previous reference frame.*/
-      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
-       _dec->state.ref_frame_idx[OC_FRAME_SELF];
-    }
-#if defined(OC_DUMP_IMAGES)
-    /*Don't dump images for dropped frames.*/
-    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
-#endif
-    return 0;
-  }
-  else{
-    /*Just update the granule position and return.*/
-    _dec->state.granpos=
-     (_dec->state.keyframe_num<<_dec->state.info.keyframe_granule_shift)+
-     (_dec->state.curframe_num-_dec->state.keyframe_num);
-    _dec->state.curframe_num++;
-    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
-    return TH_DUPFRAME;
-  }
-}
-
-int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
-  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
-  return 0;
-}

+ 0 - 199
Engine/lib/libtheora/lib/dec/fragment.c

@@ -1,199 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: fragment.c 15469 2008-10-30 12:49:42Z tterribe $
-
- ********************************************************************/
-
-#include "../internal.h"
-
-void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst,
- int _dst_ystride,const ogg_int16_t *_residue){
-  _state->opt_vtable.frag_recon_intra(_dst,_dst_ystride,_residue);
-}
-
-void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue){
-  int i;
-  for(i=0;i<8;i++){
-    int j;
-    for(j=0;j<8;j++){
-      int res;
-      res=*_residue++;
-      _dst[j]=OC_CLAMP255(res+128);
-    }
-    _dst+=_dst_ystride;
-  }
-}
-
-void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
- int _dst_ystride,const unsigned char *_src,int _src_ystride,
- const ogg_int16_t *_residue){
-  _state->opt_vtable.frag_recon_inter(_dst,_dst_ystride,_src,_src_ystride,
-   _residue);
-}
-
-void oc_frag_recon_inter_c(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue){
-  int i;
-  for(i=0;i<8;i++){
-    int j;
-    for(j=0;j<8;j++){
-      int res;
-      res=*_residue++;
-      _dst[j]=OC_CLAMP255(res+_src[j]);
-    }
-    _dst+=_dst_ystride;
-    _src+=_src_ystride;
-  }
-}
-
-void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst,
- int _dst_ystride,const unsigned char *_src1,int _src1_ystride,
- const unsigned char *_src2,int _src2_ystride,const ogg_int16_t *_residue){
-  _state->opt_vtable.frag_recon_inter2(_dst,_dst_ystride,_src1,_src1_ystride,
-   _src2,_src2_ystride,_residue);
-}
-
-void oc_frag_recon_inter2_c(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue){
-  int i;
-  for(i=0;i<8;i++){
-    int j;
-    for(j=0;j<8;j++){
-      int res;
-      res=*_residue++;
-      _dst[j]=OC_CLAMP255(res+((int)_src1[j]+_src2[j]>>1));
-    }
-    _dst+=_dst_ystride;
-    _src1+=_src1_ystride;
-    _src2+=_src2_ystride;
-  }
-}
-
-/*Computes the predicted DC value for the given fragment.
-  This requires that the fully decoded DC values be available for the left,
-   upper-left, upper, and upper-right fragments (if they exist).
-  _frag:      The fragment to predict the DC value for.
-  _fplane:    The fragment plane the fragment belongs to.
-  _x:         The x-coordinate of the fragment.
-  _y:         The y-coordinate of the fragment.
-  _pred_last: The last fully-decoded DC value for each predictor frame
-               (OC_FRAME_GOLD, OC_FRAME_PREV and OC_FRAME_SELF).
-              This should be initialized to 0's for the first fragment in each
-               color plane.
-  Return: The predicted DC value for this fragment.*/
-int oc_frag_pred_dc(const oc_fragment *_frag,
- const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]){
-  static const int PRED_SCALE[16][4]={
-    /*0*/
-    {0,0,0,0},
-    /*OC_PL*/
-    {1,0,0,0},
-    /*OC_PUL*/
-    {1,0,0,0},
-    /*OC_PL|OC_PUL*/
-    {1,0,0,0},
-    /*OC_PU*/
-    {1,0,0,0},
-    /*OC_PL|OC_PU*/
-    {1,1,0,0},
-    /*OC_PUL|OC_PU*/
-    {0,1,0,0},
-    /*OC_PL|OC_PUL|PC_PU*/
-    {29,-26,29,0},
-    /*OC_PUR*/
-    {1,0,0,0},
-    /*OC_PL|OC_PUR*/
-    {75,53,0,0},
-    /*OC_PUL|OC_PUR*/
-    {1,1,0,0},
-    /*OC_PL|OC_PUL|OC_PUR*/
-    {75,0,53,0},
-    /*OC_PU|OC_PUR*/
-    {1,0,0,0},
-    /*OC_PL|OC_PU|OC_PUR*/
-    {75,0,53,0},
-    /*OC_PUL|OC_PU|OC_PUR*/
-    {3,10,3,0},
-    /*OC_PL|OC_PUL|OC_PU|OC_PUR*/
-    {29,-26,29,0}
-  };
-  static const int PRED_SHIFT[16]={0,0,0,0,0,1,0,5,0,7,1,7,0,7,4,5};
-  static const int PRED_RMASK[16]={0,0,0,0,0,1,0,31,0,127,1,127,0,127,15,31};
-  static const int BC_MASK[8]={
-    /*No boundary condition.*/
-    OC_PL|OC_PUL|OC_PU|OC_PUR,
-    /*Left column.*/
-    OC_PU|OC_PUR,
-    /*Top row.*/
-    OC_PL,
-    /*Top row, left column.*/
-    0,
-    /*Right column.*/
-    OC_PL|OC_PUL|OC_PU,
-    /*Right and left column.*/
-    OC_PU,
-    /*Top row, right column.*/
-    OC_PL,
-    /*Top row, right and left column.*/
-    0
-  };
-  /*Predictor fragments, left, up-left, up, up-right.*/
-  const oc_fragment *predfr[4];
-  /*The frame used for prediction for this fragment.*/
-  int                pred_frame;
-  /*The boundary condition flags.*/
-  int                bc;
-  /*DC predictor values: left, up-left, up, up-right, missing values
-     skipped.*/
-  int                p[4];
-  /*Predictor count.*/
-  int                np;
-  /*Which predictor constants to use.*/
-  int                pflags;
-  /*The predicted DC value.*/
-  int                ret;
-  int                i;
-  pred_frame=OC_FRAME_FOR_MODE[_frag->mbmode];
-  bc=(_x==0)+((_y==0)<<1)+((_x+1==_fplane->nhfrags)<<2);
-  predfr[0]=_frag-1;
-  predfr[1]=_frag-_fplane->nhfrags-1;
-  predfr[2]=predfr[1]+1;
-  predfr[3]=predfr[2]+1;
-  np=0;
-  pflags=0;
-  for(i=0;i<4;i++){
-    int pflag;
-    pflag=1<<i;
-    if((BC_MASK[bc]&pflag)&&predfr[i]->coded&&
-     OC_FRAME_FOR_MODE[predfr[i]->mbmode]==pred_frame){
-      p[np++]=predfr[i]->dc;
-      pflags|=pflag;
-    }
-  }
-  if(pflags==0)return _pred_last[pred_frame];
-  else{
-    ret=PRED_SCALE[pflags][0]*p[0];
-    /*LOOP VECTORIZES.*/
-    for(i=1;i<np;i++)ret+=PRED_SCALE[pflags][i]*p[i];
-    ret=OC_DIV_POW2(ret,PRED_SHIFT[pflags],PRED_RMASK[pflags]);
-  }
-  if((pflags&(OC_PL|OC_PUL|OC_PU))==(OC_PL|OC_PUL|OC_PU)){
-    if(abs(ret-p[2])>128)ret=p[2];
-    else if(abs(ret-p[0])>128)ret=p[0];
-    else if(abs(ret-p[1])>128)ret=p[1];
-  }
-  return ret;
-}

+ 0 - 325
Engine/lib/libtheora/lib/dec/huffdec.c

@@ -1,325 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: huffdec.c 15431 2008-10-21 05:04:02Z giles $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <ogg/ogg.h>
-#include "huffdec.h"
-#include "decint.h"
-
-
-/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
-#define _ogg_offsetof(_type,_field)\
- ((size_t)((char *)&((_type *)0)->_field-(char *)0))
-
-/*These two functions are really part of the bitpack.c module, but
-  they are only used here. Declaring local static versions so they
-  can be inlined saves considerable function call overhead.*/
-
-/*Read in bits without advancing the bitptr.
-  Here we assume 0<=_bits&&_bits<=32.*/
-static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret){
-  long ret;
-  long m;
-  long d;
-  m=32-_bits;
-  _bits+=_b->endbit;
-  d=_b->storage-_b->endbyte;
-  if(d<=4){
-    /*Not the main path.*/
-    if(d<=0){
-      *_ret=0L;
-      return -(_bits>d*8);
-    }
-    /*If we have some bits left, but not enough, return the ones we have.*/
-    if(d*8<_bits)_bits=d*8;
-  }
-  ret=_b->ptr[0]<<24+_b->endbit;
-  if(_bits>8){
-    ret|=_b->ptr[1]<<16+_b->endbit;
-    if(_bits>16){
-      ret|=_b->ptr[2]<<8+_b->endbit;
-      if(_bits>24){
-        ret|=_b->ptr[3]<<_b->endbit;
-        if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
-      }
-    }
-  }
-  *_ret=((ret&0xFFFFFFFF)>>(m>>1))>>(m+1>>1);
-  return 0;
-}
-
-/*advance the bitptr*/
-static void theorapackB_adv(oggpack_buffer *_b,int _bits){
-  _bits+=_b->endbit;
-  _b->ptr+=_bits>>3;
-  _b->endbyte+=_bits>>3;
-  _b->endbit=_bits&7;
-}
-
-
-/*The log_2 of the size of a lookup table is allowed to grow to relative to
-   the number of unique nodes it contains.
-  E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
-   wasted (each node will have an amortized cost of at most 20 bytes when using
-   4-byte pointers).
-  Larger numbers can decode tokens with fewer read operations, while smaller
-   numbers may save more space (requiring as little as 8 bytes amortized per
-   node, though there will be more nodes).
-  With a sample file:
-  32233473 read calls are required when no tree collapsing is done (100.0%).
-  19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
-  11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
-  10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
-  10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
-  Since a value of 1 gets us the vast majority of the speed-up with only a
-   small amount of wasted memory, this is what we use.*/
-#define OC_HUFF_SLUSH (1)
-
-
-/*Allocates a Huffman tree node that represents a subtree of depth _nbits.
-  _nbits: The depth of the subtree.
-          If this is 0, the node is a leaf node.
-          Otherwise 1<<_nbits pointers are allocated for children.
-  Return: The newly allocated and fully initialized Huffman tree node.*/
-static oc_huff_node *oc_huff_node_alloc(int _nbits){
-  oc_huff_node *ret;
-  size_t        size;
-  size=_ogg_offsetof(oc_huff_node,nodes);
-  if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
-  ret=_ogg_calloc(1,size);
-  ret->nbits=(unsigned char)_nbits;
-  return ret;
-}
-
-/*Frees a Huffman tree node allocated with oc_huf_node_alloc.
-  _node: The node to free.
-         This may be NULL.*/
-static void oc_huff_node_free(oc_huff_node *_node){
-  _ogg_free(_node);
-}
-
-/*Frees the memory used by a Huffman tree.
-  _node: The Huffman tree to free.
-         This may be NULL.*/
-static void oc_huff_tree_free(oc_huff_node *_node){
-  if(_node==NULL)return;
-  if(_node->nbits){
-    int nchildren;
-    int i;
-    int inext;
-    nchildren=1<<_node->nbits;
-    for(i=0;i<nchildren;i=inext){
-      inext=i+(_node->nodes[i]!=NULL?1<<_node->nbits-_node->nodes[i]->depth:1);
-      oc_huff_tree_free(_node->nodes[i]);
-    }
-  }
-  oc_huff_node_free(_node);
-}
-
-/*Unpacks a sub-tree from the given buffer.
-  _opb:    The buffer to unpack from.
-  _binode: The location to store a pointer to the sub-tree in.
-  _depth:  The current depth of the tree.
-           This is used to prevent infinite recursion.
-  Return: 0 on success, or a negative value on error.*/
-static int oc_huff_tree_unpack(oggpack_buffer *_opb,
- oc_huff_node **_binode,int _depth){
-  oc_huff_node *binode;
-  long          bits;
-  /*Prevent infinite recursion.*/
-  if(++_depth>32)return TH_EBADHEADER;
-  if(theorapackB_read1(_opb,&bits)<0)return TH_EBADHEADER;
-  /*Read an internal node:*/
-  if(!bits){
-    int ret;
-    binode=oc_huff_node_alloc(1);
-    binode->depth=(unsigned char)(_depth>1);
-    ret=oc_huff_tree_unpack(_opb,binode->nodes,_depth);
-    if(ret>=0)ret=oc_huff_tree_unpack(_opb,binode->nodes+1,_depth);
-    if(ret<0){
-      oc_huff_tree_free(binode);
-      *_binode=NULL;
-      return ret;
-    }
-  }
-  /*Read a leaf node:*/
-  else{
-    if(theorapackB_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return TH_EBADHEADER;
-    binode=oc_huff_node_alloc(0);
-    binode->depth=(unsigned char)(_depth>1);
-    binode->token=(unsigned char)bits;
-  }
-  *_binode=binode;
-  return 0;
-}
-
-/*Finds the depth of shortest branch of the given sub-tree.
-  The tree must be binary.
-  _binode: The root of the given sub-tree.
-           _binode->nbits must be 0 or 1.
-  Return: The smallest depth of a leaf node in this sub-tree.
-          0 indicates this sub-tree is a leaf node.*/
-static int oc_huff_tree_mindepth(oc_huff_node *_binode){
-  int depth0;
-  int depth1;
-  if(_binode->nbits==0)return 0;
-  depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
-  depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
-  return OC_MINI(depth0,depth1)+1;
-}
-
-/*Finds the number of internal nodes at a given depth, plus the number of
-   leaves at that depth or shallower.
-  The tree must be binary.
-  _binode: The root of the given sub-tree.
-           _binode->nbits must be 0 or 1.
-  Return: The number of entries that would be contained in a jump table of the
-           given depth.*/
-static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
-  if(_binode->nbits==0||_depth<=0)return 1;
-  else{
-    return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
-     oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
-  }
-}
-
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode);
-
-/*Fills the given nodes table with all the children in the sub-tree at the
-   given depth.
-  The nodes in the sub-tree with a depth less than that stored in the table
-   are freed.
-  The sub-tree must be binary and complete up until the given depth.
-  _nodes:  The nodes table to fill.
-  _binode: The root of the sub-tree to fill it with.
-           _binode->nbits must be 0 or 1.
-  _level:  The current level in the table.
-           0 indicates that the current node should be stored, regardless of
-            whether it is a leaf node or an internal node.
-  _depth:  The depth of the nodes to fill the table with, relative to their
-            parent.*/
-static void oc_huff_node_fill(oc_huff_node **_nodes,
- oc_huff_node *_binode,int _level,int _depth){
-  if(_level<=0||_binode->nbits==0){
-    int i;
-    _binode->depth=(unsigned char)(_depth-_level);
-    _nodes[0]=oc_huff_tree_collapse(_binode);
-    for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
-  }
-  else{
-    _level--;
-    oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth);
-    oc_huff_node_fill(_nodes+(1<<_level),_binode->nodes[1],_level,_depth);
-    oc_huff_node_free(_binode);
-  }
-}
-
-/*Finds the largest complete sub-tree rooted at the current node and collapses
-   it into a single node.
-  This procedure is then applied recursively to all the children of that node.
-  _binode: The root of the sub-tree to collapse.
-           _binode->nbits must be 0 or 1.
-  Return: The new root of the collapsed sub-tree.*/
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode){
-  oc_huff_node *root;
-  int           mindepth;
-  int           depth;
-  int           loccupancy;
-  int           occupancy;
-  depth=mindepth=oc_huff_tree_mindepth(_binode);
-  occupancy=1<<mindepth;
-  do{
-    loccupancy=occupancy;
-    occupancy=oc_huff_tree_occupancy(_binode,++depth);
-  }
-  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
-  depth--;
-  if(depth<=1)return _binode;
-  root=oc_huff_node_alloc(depth);
-  root->depth=_binode->depth;
-  oc_huff_node_fill(root->nodes,_binode,depth,depth);
-  return root;
-}
-
-/*Makes a copy of the given Huffman tree.
-  _node: The Huffman tree to copy.
-  Return: The copy of the Huffman tree.*/
-static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node){
-  oc_huff_node *ret;
-  ret=oc_huff_node_alloc(_node->nbits);
-  ret->depth=_node->depth;
-  if(_node->nbits){
-    int nchildren;
-    int i;
-    int inext;
-    nchildren=1<<_node->nbits;
-    for(i=0;i<nchildren;){
-      ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i]);
-      inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
-      while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
-    }
-  }
-  else ret->token=_node->token;
-  return ret;
-}
-
-/*Unpacks a set of Huffman trees, and reduces them to a collapsed
-   representation.
-  _opb:   The buffer to unpack the trees from.
-  _nodes: The table to fill with the Huffman trees.
-  Return: 0 on success, or a negative value on error.*/
-int oc_huff_trees_unpack(oggpack_buffer *_opb,
- oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
-  int i;
-  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
-    int ret;
-    ret=oc_huff_tree_unpack(_opb,_nodes+i,0);
-    if(ret<0)return ret;
-    _nodes[i]=oc_huff_tree_collapse(_nodes[i]);
-  }
-  return 0;
-}
-
-/*Makes a copy of the given set of Huffman trees.
-  _dst: The array to store the copy in.
-  _src: The array of trees to copy.*/
-void oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
- const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
-  int i;
-  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_dst[i]=oc_huff_tree_copy(_src[i]);
-}
-
-/*Frees the memory used by a set of Huffman trees.
-  _nodes: The array of trees to free.*/
-void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
-  int i;
-  for(i=0;i<TH_NHUFFMAN_TABLES;i++)oc_huff_tree_free(_nodes[i]);
-}
-
-/*Unpacks a single token using the given Huffman tree.
-  _opb:  The buffer to unpack the token from.
-  _node: The tree to unpack the token with.
-  Return: The token value.*/
-int oc_huff_token_decode(oggpack_buffer *_opb,const oc_huff_node *_node){
-  long bits;
-  while(_node->nbits!=0){
-    theorapackB_look(_opb,_node->nbits,&bits);
-    _node=_node->nodes[bits];
-    theorapackB_adv(_opb,_node->depth);
-  }
-  return _node->token;
-}

+ 0 - 26
Engine/lib/libtheora/lib/dec/idct.h

@@ -1,26 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: idct.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/*Inverse DCT transforms.*/
-#include <ogg/ogg.h>
-#if !defined(_idct_H)
-# define _idct_H (1)
-
-void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
-void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
-
-#endif

+ 0 - 88
Engine/lib/libtheora/lib/dec/ocintrin.h

@@ -1,88 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: ocintrin.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/*Some common macros for potential platform-specific optimization.*/
-#include <math.h>
-#if !defined(_ocintrin_H)
-# define _ocintrin_H (1)
-
-/*Some specific platforms may have optimized intrinsic or inline assembly
-   versions of these functions which can substantially improve performance.
-  We define macros for them to allow easy incorporation of these non-ANSI
-   features.*/
-
-/*Branchless, but not correct for differences larger than INT_MAX.
-static int oc_mini(int _a,int _b){
-  int ambsign;
-  ambsign=_a-_b>>sizeof(int)*8-1;
-  return (_a&~ambsign)+(_b&ambsign);
-}*/
-
-
-#define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
-#define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
-/*Clamps an integer into the given range.
-  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
-   behavior is required to meet our documented API behavior).
-  _a: The lower bound.
-  _b: The value to clamp.
-  _c: The upper boud.*/
-#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
-#define OC_CLAMP255(_x)     ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
-/*Divides an integer by a power of two, truncating towards 0.
-  _dividend: The integer to divide.
-  _shift:    The non-negative power of two to divide by.
-  _rmask:    (1<<_shift)-1*/
-#define OC_DIV_POW2(_dividend,_shift,_rmask)\
-  ((_dividend)+(((_dividend)>>sizeof(_dividend)*8-1)&(_rmask))>>(_shift))
-/*Divides _x by 65536, truncating towards 0.*/
-#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
-/*Divides _x by 2, truncating towards 0.*/
-#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
-/*Divides _x by 8, truncating towards 0.*/
-#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
-/*Divides _x by 16, truncating towards 0.*/
-#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
-/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
-   negative dividends first..
-  When _rval is (1<<_shift-1), this is equivalent to division with rounding
-   ties towards positive infinity.*/
-#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
-  ((_dividend)+((_dividend)>>sizeof(_dividend)*8-1)+(_rval)>>(_shift))
-/*Swaps two integers _a and _b if _a>_b.*/
-#define OC_SORT2I(_a,_b)\
-  if((_a)>(_b)){\
-    int t__;\
-    t__=(_a);\
-    (_a)=(_b);\
-    (_b)=t__;\
-  }
-
-
-
-/*All of these macros should expect floats as arguments.*/
-#define OC_MAXF(_a,_b)      ((_a)<(_b)?(_b):(_a))
-#define OC_MINF(_a,_b)      ((_a)>(_b)?(_b):(_a))
-#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
-#define OC_FABSF(_f)        ((float)fabs(_f))
-#define OC_SQRTF(_f)        ((float)sqrt(_f))
-#define OC_POWF(_b,_e)      ((float)pow(_b,_e))
-#define OC_LOGF(_f)         ((float)log(_f))
-#define OC_IFLOORF(_f)      ((int)floor(_f))
-#define OC_ICEILF(_f)       ((int)ceil(_f))
-
-#endif

+ 0 - 122
Engine/lib/libtheora/lib/dec/quant.c

@@ -1,122 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: quant.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include <ogg/ogg.h>
-#include "quant.h"
-#include "decint.h"
-
-static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
-static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
-
-/*Initializes the dequantization tables from a set of quantizer info.
-  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
-   be initialized as pointing to the storage reserved for them in the
-   oc_theora_state (resp. oc_enc_ctx) structure.
-  If some tables are duplicates of others, the pointers will be adjusted to
-   point to a single copy of the tables, but the storage for them will not be
-   freed.
-  If you're concerned about the memory footprint, the obvious thing to do is
-   to move the storage out of its fixed place in the structures and allocate
-   it on demand.
-  However, a much, much better option is to only store the quantization
-   matrices being used for the current frame, and to recalculate these as the
-   qi values change between frames (this is what VP3 did).*/
-void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
- int _pp_dc_scale[64],const th_quant_info *_qinfo){
-  /*coding mode: intra or inter.*/
-  int          qti;
-  /*Y', C_b, C_r*/
-  int          pli;
-  for(qti=0;qti<2;qti++){
-    for(pli=0;pli<3;pli++){
-      oc_quant_tables stage;
-      /*Quality index.*/
-      int qi;
-      /*Range iterator.*/
-      int qri;
-      for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){
-        th_quant_base base;
-        ogg_uint32_t  q;
-        int           qi_start;
-        int           qi_end;
-        int           ci;
-        memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
-         sizeof(base));
-        qi_start=qi;
-        if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
-        else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
-        /*Iterate over quality indicies in this range.*/
-        for(;;){
-          ogg_uint32_t qfac;
-          /*In the original VP3.2 code, the rounding offset and the size of the
-             dead zone around 0 were controlled by a "sharpness" parameter.
-            The size of our dead zone is now controlled by the per-coefficient
-             quality thresholds returned by our HVS module.
-            We round down from a more accurate value when the quality of the
-             reconstruction does not fall below our threshold and it saves bits.
-            Hence, all of that VP3.2 code is gone from here, and the remaining
-             floating point code has been implemented as equivalent integer code
-             with exact precision.*/
-          qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
-          /*For postprocessing, not dequantization.*/
-          if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
-          /*Scale DC the coefficient from the proper table.*/
-          q=(qfac/100)<<2;
-          q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
-          stage[qi][0]=(ogg_uint16_t)q;
-          /*Now scale AC coefficients from the proper table.*/
-          for(ci=1;ci<64;ci++){
-            q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
-            q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
-            stage[qi][ci]=(ogg_uint16_t)q;
-          }
-          if(++qi>=qi_end)break;
-          /*Interpolate the next base matrix.*/
-          for(ci=0;ci<64;ci++){
-            base[ci]=(unsigned char)(
-             (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
-             (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
-             +_qinfo->qi_ranges[qti][pli].sizes[qri])/
-             (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
-          }
-        }
-      }
-      /*Staging matrices complete; commit to memory only if this isn't a
-         duplicate of a preceeding plane.
-        This simple check helps us improve cache coherency later.*/
-      {
-        int dupe;
-        int qtj;
-        int plj;
-        dupe=0;
-        for(qtj=0;qtj<=qti;qtj++){
-          for(plj=0;plj<(qtj<qti?3:pli);plj++){
-            if(!memcmp(stage,_dequant[qtj][plj],sizeof(stage))){
-              dupe=1;
-              break;
-            }
-          }
-          if(dupe)break;
-        }
-        if(dupe)_dequant[qti][pli]=_dequant[qtj][plj];
-        else memcpy(_dequant[qti][pli],stage,sizeof(stage));
-      }
-    }
-  }
-}

+ 0 - 653
Engine/lib/libtheora/lib/dec/x86/mmxstate.c

@@ -1,653 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/*MMX acceleration of complete fragment reconstruction algorithm.
-  Originally written by Rudolf Marek.*/
-#include "x86int.h"
-#include "../../internal.h"
-#include <stddef.h>
-
-#if defined(USE_ASM)
-
-static const __attribute__((aligned(8),used)) int OC_FZIG_ZAGMMX[64]={
-   0, 8, 1, 2, 9,16,24,17,
-  10, 3,32,11,18,25, 4,12,
-   5,26,19,40,33,34,41,48,
-  27, 6,13,20,28,21,14, 7,
-  56,49,42,35,43,50,57,36,
-  15,22,29,30,23,44,37,58,
-  51,59,38,45,52,31,60,53,
-  46,39,47,54,61,62,55,63
-};
-
-
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
-  ogg_int16_t  __attribute__((aligned(8))) res_buf[64];
-  int dst_framei;
-  int dst_ystride;
-  int zzi;
-  /*_last_zzi is subtly different from an actual count of the number of
-     coefficients we decoded for this block.
-    It contains the value of zzi BEFORE the final token in the block was
-     decoded.
-    In most cases this is an EOB token (the continuation of an EOB run from a
-     previous block counts), and so this is the same as the coefficient count.
-    However, in the case that the last token was NOT an EOB token, but filled
-     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
-    Provided the last token was not a pure zero run, the minimum value it can
-     be is 46, and so that doesn't affect any of the cases in this routine.
-    However, if the last token WAS a pure zero run of length 63, then _last_zzi
-     will be 1 while the number of coefficients decoded is 64.
-    Thus, we will trigger the following special case, where the real
-     coefficient count would not.
-    Note also that a zero run of length 64 will give _last_zzi a value of 0,
-     but we still process the DC coefficient, which might have a non-zero value
-     due to DC prediction.
-    Although convoluted, this is arguably the correct behavior: it allows us to
-     dequantize fewer coefficients and use a smaller transform when the block
-     ends with a long zero run instead of a normal EOB token.
-    It could be smarter... multiple separate zero runs at the end of a block
-     will fool it, but an encoder that generates these really deserves what it
-     gets.
-    Needless to say we inherited this approach from VP3.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    ogg_uint16_t p;
-    /*Why is the iquant product rounded in this case and no others?
-      Who knows.*/
-    p=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
-    /*Fill res_buf with p.*/
-    __asm__ __volatile__(
-      /*mm0=0000 0000 0000 AAAA*/
-      "movd %[p],%%mm0\n\t"
-      /*mm1=0000 0000 0000 AAAA*/
-      "movd %[p],%%mm1\n\t"
-      /*mm0=0000 0000 AAAA 0000*/
-      "pslld $16,%%mm0\n\t"
-      /*mm0=0000 0000 AAAA AAAA*/
-      "por %%mm1,%%mm0\n\t"
-      /*mm0=AAAA AAAA AAAA AAAA*/
-      "punpcklwd %%mm0,%%mm0\n\t"
-      "movq %%mm0,(%[res_buf])\n\t"
-      "movq %%mm0,8(%[res_buf])\n\t"
-      "movq %%mm0,16(%[res_buf])\n\t"
-      "movq %%mm0,24(%[res_buf])\n\t"
-      "movq %%mm0,32(%[res_buf])\n\t"
-      "movq %%mm0,40(%[res_buf])\n\t"
-      "movq %%mm0,48(%[res_buf])\n\t"
-      "movq %%mm0,56(%[res_buf])\n\t"
-      "movq %%mm0,64(%[res_buf])\n\t"
-      "movq %%mm0,72(%[res_buf])\n\t"
-      "movq %%mm0,80(%[res_buf])\n\t"
-      "movq %%mm0,88(%[res_buf])\n\t"
-      "movq %%mm0,96(%[res_buf])\n\t"
-      "movq %%mm0,104(%[res_buf])\n\t"
-      "movq %%mm0,112(%[res_buf])\n\t"
-      "movq %%mm0,120(%[res_buf])\n\t"
-      :
-      :[res_buf]"r"(res_buf),[p]"r"((unsigned)p)
-      :"memory"
-    );
-  }
-  else{
-    /*Then, fill in the remainder of the coefficients with 0's, and perform
-       the iDCT.*/
-    /*First zero the buffer.*/
-    /*On K7, etc., this could be replaced with movntq and sfence.*/
-    __asm__ __volatile__(
-      "pxor %%mm0,%%mm0\n\t"
-      "movq %%mm0,(%[res_buf])\n\t"
-      "movq %%mm0,8(%[res_buf])\n\t"
-      "movq %%mm0,16(%[res_buf])\n\t"
-      "movq %%mm0,24(%[res_buf])\n\t"
-      "movq %%mm0,32(%[res_buf])\n\t"
-      "movq %%mm0,40(%[res_buf])\n\t"
-      "movq %%mm0,48(%[res_buf])\n\t"
-      "movq %%mm0,56(%[res_buf])\n\t"
-      "movq %%mm0,64(%[res_buf])\n\t"
-      "movq %%mm0,72(%[res_buf])\n\t"
-      "movq %%mm0,80(%[res_buf])\n\t"
-      "movq %%mm0,88(%[res_buf])\n\t"
-      "movq %%mm0,96(%[res_buf])\n\t"
-      "movq %%mm0,104(%[res_buf])\n\t"
-      "movq %%mm0,112(%[res_buf])\n\t"
-      "movq %%mm0,120(%[res_buf])\n\t"
-      :
-      :[res_buf]"r"(res_buf)
-      :"memory"
-    );
-    res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
-    /*This is planned to be rewritten in MMX.*/
-    for(zzi=1;zzi<_ncoefs;zzi++){
-      int ci;
-      ci=OC_FZIG_ZAG[zzi];
-      res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
-       _ac_iquant[ci]);
-    }
-    if(_last_zzi<10)oc_idct8x8_10_mmx(res_buf);
-    else oc_idct8x8_mmx(res_buf);
-  }
-  /*Fill in the target buffer.*/
-  dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
-  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  /*For now ystride values in all ref frames assumed to be equal.*/
-  if(_frag->mbmode==OC_MODE_INTRA){
-    oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
-  }
-  else{
-    int ref_framei;
-    int ref_ystride;
-    int mvoffsets[2];
-    ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
-    ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],_frag->mv[1],
-     ref_ystride,_pli)>1){
-      oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
-       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,
-       _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf);
-    }
-    else{
-      oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
-       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf);
-    }
-  }
-  oc_restore_fpu(_state);
-}
-
-/*Copies the fragments specified by the lists of fragment indices from one
-   frame to another.
-  _fragis:    A pointer to a list of fragment indices.
-  _nfragis:   The number of fragment indices to copy.
-  _dst_frame: The reference frame to copy to.
-  _src_frame: The reference frame to copy from.
-  _pli:       The color plane the fragments lie in.*/
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli){
-  const int *fragi;
-  const int *fragi_end;
-  int        dst_framei;
-  ptrdiff_t  dst_ystride;
-  int        src_framei;
-  ptrdiff_t  src_ystride;
-  dst_framei=_state->ref_frame_idx[_dst_frame];
-  src_framei=_state->ref_frame_idx[_src_frame];
-  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
-  fragi_end=_fragis+_nfragis;
-  for(fragi=_fragis;fragi<fragi_end;fragi++){
-    oc_fragment   *frag;
-    unsigned char *dst;
-    unsigned char *src;
-    ptrdiff_t      s;
-    frag=_state->frags+*fragi;
-    dst=frag->buffer[dst_framei];
-    src=frag->buffer[src_framei];
-    __asm__ __volatile__(
-      /*src+0*src_ystride*/
-      "movq (%[src]),%%mm0\n\t"
-      /*s=src_ystride*3*/
-      "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
-      /*src+1*src_ystride*/
-      "movq (%[src],%[src_ystride]),%%mm1\n\t"
-      /*src+2*src_ystride*/
-      "movq (%[src],%[src_ystride],2),%%mm2\n\t"
-      /*src+3*src_ystride*/
-      "movq (%[src],%[s]),%%mm3\n\t"
-      /*dst+0*dst_ystride*/
-      "movq %%mm0,(%[dst])\n\t"
-      /*s=dst_ystride*3*/
-      "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
-      /*dst+1*dst_ystride*/
-      "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
-      /*Pointer to next 4.*/
-      "lea (%[src],%[src_ystride],4),%[src]\n\t"
-      /*dst+2*dst_ystride*/
-      "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
-      /*dst+3*dst_ystride*/
-      "movq %%mm3,(%[dst],%[s])\n\t"
-      /*Pointer to next 4.*/
-      "lea (%[dst],%[dst_ystride],4),%[dst]\n\t"
-      /*src+0*src_ystride*/
-      "movq (%[src]),%%mm0\n\t"
-      /*s=src_ystride*3*/
-      "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
-      /*src+1*src_ystride*/
-      "movq (%[src],%[src_ystride]),%%mm1\n\t"
-      /*src+2*src_ystride*/
-      "movq (%[src],%[src_ystride],2),%%mm2\n\t"
-      /*src+3*src_ystride*/
-      "movq (%[src],%[s]),%%mm3\n\t"
-      /*dst+0*dst_ystride*/
-      "movq %%mm0,(%[dst])\n\t"
-      /*s=dst_ystride*3*/
-      "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
-      /*dst+1*dst_ystride*/
-      "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
-      /*dst+2*dst_ystride*/
-      "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
-      /*dst+3*dst_ystride*/
-      "movq %%mm3,(%[dst],%[s])\n\t"
-      :[s]"=&r"(s)
-      :[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride),
-       [src_ystride]"r"(src_ystride)
-      :"memory"
-    );
-  }
-  /*This needs to be removed when decode specific functions are implemented:*/
-  __asm__ __volatile__("emms\n\t");
-}
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
-  ptrdiff_t s;
-  _pix-=_ystride*2;
-  __asm__ __volatile__(
-    /*mm0=0*/
-    "pxor %%mm0,%%mm0\n\t"
-    /*s=_ystride*3*/
-    "lea (%[ystride],%[ystride],2),%[s]\n\t"
-    /*mm7=_pix[0...8]*/
-    "movq (%[pix]),%%mm7\n\t"
-    /*mm4=_pix[0...8+_ystride*3]*/
-    "movq (%[pix],%[s]),%%mm4\n\t"
-    /*mm6=_pix[0...8]*/
-    "movq %%mm7,%%mm6\n\t"
-    /*Expand unsigned _pix[0...3] to 16 bits.*/
-    "punpcklbw %%mm0,%%mm6\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    /*Expand unsigned _pix[4...8] to 16 bits.*/
-    "punpckhbw %%mm0,%%mm7\n\t"
-    /*Expand other arrays too.*/
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm5\n\t"
-    /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
-    "psubw %%mm4,%%mm6\n\t"
-    "psubw %%mm5,%%mm7\n\t"
-    /*mm5=mm4=_pix[0...8+_ystride]*/
-    "movq (%[pix],%[ystride]),%%mm4\n\t"
-    /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
-    "movq (%[pix],%[ystride],2),%%mm2\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    "movq %%mm2,%%mm3\n\t"
-    "movq %%mm2,%%mm1\n\t"
-    /*Expand these arrays.*/
-    "punpckhbw %%mm0,%%mm5\n\t"
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm3\n\t"
-    "punpcklbw %%mm0,%%mm2\n\t"
-    /*mm0=3 3 3 3
-      mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
-    "pcmpeqw %%mm0,%%mm0\n\t"
-    "psubw %%mm5,%%mm3\n\t"
-    "psrlw $14,%%mm0\n\t"
-    "psubw %%mm4,%%mm2\n\t"
-    /*Scale by 3.*/
-    "pmullw %%mm0,%%mm3\n\t"
-    "pmullw %%mm0,%%mm2\n\t"
-    /*mm0=4 4 4 4
-      f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
-       3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
-    "psrlw $1,%%mm0\n\t"
-    "paddw %%mm7,%%mm3\n\t"
-    "psllw $2,%%mm0\n\t"
-    "paddw %%mm6,%%mm2\n\t"
-    /*Add 4.*/
-    "paddw %%mm0,%%mm3\n\t"
-    "paddw %%mm0,%%mm2\n\t"
-    /*"Divide" by 8.*/
-    "psraw $3,%%mm3\n\t"
-    "psraw $3,%%mm2\n\t"
-    /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
-    /*Free up mm5.*/
-    "packuswb %%mm5,%%mm4\n\t"
-    /*mm0=L L L L*/
-    "movq (%[ll]),%%mm0\n\t"
-    /*if(R_i<-2L||R_i>2L)R_i=0:*/
-    "movq %%mm2,%%mm5\n\t"
-    "pxor %%mm6,%%mm6\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "psubw %%mm0,%%mm6\n\t"
-    "psllw $1,%%mm7\n\t"
-    "psllw $1,%%mm6\n\t"
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    "pcmpgtw %%mm2,%%mm7\n\t"
-    "pcmpgtw %%mm6,%%mm5\n\t"
-    "pand %%mm7,%%mm2\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "pand %%mm5,%%mm2\n\t"
-    "psllw $1,%%mm7\n\t"
-    "movq %%mm3,%%mm5\n\t"
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    "pcmpgtw %%mm3,%%mm7\n\t"
-    "pcmpgtw %%mm6,%%mm5\n\t"
-    "pand %%mm7,%%mm3\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "pand %%mm5,%%mm3\n\t"
-    /*if(R_i<-L)R_i'=R_i+2L;
-      if(R_i>L)R_i'=R_i-2L;
-      if(R_i<-L||R_i>L)R_i=-R_i':*/
-    "psraw $1,%%mm6\n\t"
-    "movq %%mm2,%%mm5\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm5=R_i>L?FF:00*/
-    "pcmpgtw %%mm0,%%mm5\n\t"
-    /*mm6=-L>R_i?FF:00*/
-    "pcmpgtw %%mm2,%%mm6\n\t"
-    /*mm7=R_i>L?2L:0*/
-    "pand %%mm5,%%mm7\n\t"
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm7,%%mm2\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    /*mm5=-L>R_i||R_i>L*/
-    "por %%mm6,%%mm5\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm7=-L>R_i?2L:0*/
-    "pand %%mm6,%%mm7\n\t"
-    "pxor %%mm6,%%mm6\n\t"
-    /*mm2=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm7,%%mm2\n\t"
-    "psubw %%mm0,%%mm6\n\t"
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    "pand %%mm2,%%mm5\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm5,%%mm2\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm5,%%mm2\n\t"
-    "movq %%mm3,%%mm5\n\t"
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm6=-L>R_i?FF:00*/
-    "pcmpgtw %%mm3,%%mm6\n\t"
-    /*mm5=R_i>L?FF:00*/
-    "pcmpgtw %%mm0,%%mm5\n\t"
-    /*mm7=R_i>L?2L:0*/
-    "pand %%mm5,%%mm7\n\t"
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm7,%%mm3\n\t"
-    "psllw $1,%%mm0\n\t"
-    /*mm5=-L>R_i||R_i>L*/
-    "por %%mm6,%%mm5\n\t"
-    /*mm0=-L>R_i?2L:0*/
-    "pand %%mm6,%%mm0\n\t"
-    /*mm3=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm0,%%mm3\n\t"
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    "pand %%mm3,%%mm5\n\t"
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm5,%%mm3\n\t"
-    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm5,%%mm3\n\t"
-    /*Unfortunately, there's no unsigned byte+signed byte with unsigned
-       saturation op code, so we have to promote things back 16 bits.*/
-    "pxor %%mm0,%%mm0\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm5\n\t"
-    "movq %%mm1,%%mm6\n\t"
-    "punpcklbw %%mm0,%%mm1\n\t"
-    "punpckhbw %%mm0,%%mm6\n\t"
-    /*_pix[0...8+_ystride]+=R_i*/
-    "paddw %%mm2,%%mm4\n\t"
-    "paddw %%mm3,%%mm5\n\t"
-    /*_pix[0...8+_ystride*2]-=R_i*/
-    "psubw %%mm2,%%mm1\n\t"
-    "psubw %%mm3,%%mm6\n\t"
-    "packuswb %%mm5,%%mm4\n\t"
-    "packuswb %%mm6,%%mm1\n\t"
-    /*Write it back out.*/
-    "movq %%mm4,(%[pix],%[ystride])\n\t"
-    "movq %%mm1,(%[pix],%[ystride],2)\n\t"
-    :[s]"=&r"(s)
-    :[pix]"r"(_pix),[ystride]"r"((ptrdiff_t)_ystride),[ll]"r"(_ll)
-    :"memory"
-  );
-}
-
-/*This code implements the bulk of loop_filter_h().
-  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
-   four p0's to one register we must transpose the values in four mmx regs.
-  When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,ptrdiff_t _ystride,
- const ogg_int16_t *_ll){
-  ptrdiff_t s;
-  /*d doesn't technically need to be 64-bit on x86-64, but making it so will
-     help avoid partial register stalls.*/
-  ptrdiff_t d;
-  __asm__ __volatile__(
-    /*x x x x 3 2 1 0*/
-    "movd (%[pix]),%%mm0\n\t"
-    /*s=_ystride*3*/
-    "lea (%[ystride],%[ystride],2),%[s]\n\t"
-    /*x x x x 7 6 5 4*/
-    "movd (%[pix],%[ystride]),%%mm1\n\t"
-    /*x x x x B A 9 8*/
-    "movd (%[pix],%[ystride],2),%%mm2\n\t"
-    /*x x x x F E D C*/
-    "movd (%[pix],%[s]),%%mm3\n\t"
-    /*mm0=7 3 6 2 5 1 4 0*/
-    "punpcklbw %%mm1,%%mm0\n\t"
-    /*mm2=F B E A D 9 C 8*/
-    "punpcklbw %%mm3,%%mm2\n\t"
-    /*mm1=7 3 6 2 5 1 4 0*/
-    "movq %%mm0,%%mm1\n\t"
-    /*mm0=F B 7 3 E A 6 2*/
-    "punpckhwd %%mm2,%%mm0\n\t"
-    /*mm1=D 9 5 1 C 8 4 0*/
-    "punpcklwd %%mm2,%%mm1\n\t"
-    "pxor %%mm7,%%mm7\n\t"
-    /*mm5=D 9 5 1 C 8 4 0*/
-    "movq %%mm1,%%mm5\n\t"
-    /*mm1=x C x 8 x 4 x 0==pix[0]*/
-    "punpcklbw %%mm7,%%mm1\n\t"
-    /*mm5=x D x 9 x 5 x 1==pix[1]*/
-    "punpckhbw %%mm7,%%mm5\n\t"
-    /*mm3=F B 7 3 E A 6 2*/
-    "movq %%mm0,%%mm3\n\t"
-    /*mm0=x E x A x 6 x 2==pix[2]*/
-    "punpcklbw %%mm7,%%mm0\n\t"
-    /*mm3=x F x B x 7 x 3==pix[3]*/
-    "punpckhbw %%mm7,%%mm3\n\t"
-    /*mm1=mm1-mm3==pix[0]-pix[3]*/
-    "psubw %%mm3,%%mm1\n\t"
-    /*Save a copy of pix[2] for later.*/
-    "movq %%mm0,%%mm4\n\t"
-    /*mm2=3 3 3 3
-      mm0=mm0-mm5==pix[2]-pix[1]*/
-    "pcmpeqw %%mm2,%%mm2\n\t"
-    "psubw %%mm5,%%mm0\n\t"
-    "psrlw $14,%%mm2\n\t"
-    /*Scale by 3.*/
-    "pmullw %%mm2,%%mm0\n\t"
-    /*mm2=4 4 4 4
-      f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
-    "psrlw $1,%%mm2\n\t"
-    "paddw %%mm1,%%mm0\n\t"
-    "psllw $2,%%mm2\n\t"
-    /*Add 4.*/
-    "paddw %%mm2,%%mm0\n\t"
-    /*"Divide" by 8, producing the residuals R_i.*/
-    "psraw $3,%%mm0\n\t"
-    /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
-    /*mm6=L L L L*/
-    "movq (%[ll]),%%mm6\n\t"
-    /*if(R_i<-2L||R_i>2L)R_i=0:*/
-    "movq %%mm0,%%mm1\n\t"
-    "pxor %%mm2,%%mm2\n\t"
-    "movq %%mm6,%%mm3\n\t"
-    "psubw %%mm6,%%mm2\n\t"
-    "psllw $1,%%mm3\n\t"
-    "psllw $1,%%mm2\n\t"
-    /*mm0==R_3 R_2 R_1 R_0*/
-    /*mm1==R_3 R_2 R_1 R_0*/
-    /*mm2==-2L -2L -2L -2L*/
-    /*mm3==2L 2L 2L 2L*/
-    "pcmpgtw %%mm0,%%mm3\n\t"
-    "pcmpgtw %%mm2,%%mm1\n\t"
-    "pand %%mm3,%%mm0\n\t"
-    "pand %%mm1,%%mm0\n\t"
-    /*if(R_i<-L)R_i'=R_i+2L;
-      if(R_i>L)R_i'=R_i-2L;
-      if(R_i<-L||R_i>L)R_i=-R_i':*/
-    "psraw $1,%%mm2\n\t"
-    "movq %%mm0,%%mm1\n\t"
-    "movq %%mm6,%%mm3\n\t"
-    /*mm0==R_3 R_2 R_1 R_0*/
-    /*mm1==R_3 R_2 R_1 R_0*/
-    /*mm2==-L -L -L -L*/
-    /*mm6==L L L L*/
-    /*mm2=-L>R_i?FF:00*/
-    "pcmpgtw %%mm0,%%mm2\n\t"
-    /*mm1=R_i>L?FF:00*/
-    "pcmpgtw %%mm6,%%mm1\n\t"
-    /*mm3=2L 2L 2L 2L*/
-    "psllw $1,%%mm3\n\t"
-    /*mm6=2L 2L 2L 2L*/
-    "psllw $1,%%mm6\n\t"
-    /*mm3=R_i>L?2L:0*/
-    "pand %%mm1,%%mm3\n\t"
-    /*mm6=-L>R_i?2L:0*/
-    "pand %%mm2,%%mm6\n\t"
-    /*mm0=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm3,%%mm0\n\t"
-    /*mm1=-L>R_i||R_i>L*/
-    "por %%mm2,%%mm1\n\t"
-    /*mm0=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm6,%%mm0\n\t"
-    /*mm1=-L>R_i||R_i>L?R_i':0*/
-    "pand %%mm0,%%mm1\n\t"
-    /*mm0=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm1,%%mm0\n\t"
-    /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm1,%%mm0\n\t"
-    /*_pix[1]+=R_i;*/
-    "paddw %%mm0,%%mm5\n\t"
-    /*_pix[2]-=R_i;*/
-    "psubw %%mm0,%%mm4\n\t"
-    /*mm5=x x x x D 9 5 1*/
-    "packuswb %%mm7,%%mm5\n\t"
-    /*mm4=x x x x E A 6 2*/
-    "packuswb %%mm7,%%mm4\n\t"
-    /*mm5=E D A 9 6 5 2 1*/
-    "punpcklbw %%mm4,%%mm5\n\t"
-    /*d=6 5 2 1*/
-    "movd %%mm5,%[d]\n\t"
-    "movw %w[d],1(%[pix])\n\t"
-    /*Why is there such a big stall here?*/
-    "psrlq $32,%%mm5\n\t"
-    "shr $16,%[d]\n\t"
-    "movw %w[d],1(%[pix],%[ystride])\n\t"
-    /*d=E D A 9*/
-    "movd %%mm5,%[d]\n\t"
-    "movw %w[d],1(%[pix],%[ystride],2)\n\t"
-    "shr $16,%[d]\n\t"
-    "movw %w[d],1(%[pix],%[s])\n\t"
-    :[s]"=&r"(s),[d]"=&r"(d),
-     [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
-    :
-    :"memory"
-  );
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
-  _pix-=2;
-  loop_filter_h4(_pix,_ystride,_ll);
-  loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-/*We copy the whole function because the MMX routines will be inlined 4 times,
-   and we can do just a single emms call at the end this way.
-  We also do not use the _bv lookup table, instead computing the values that
-   would lie in it on the fly.*/
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
-  The filter may be run on the bottom edge, affecting pixels in the next row of
-   fragments, so this row also needs to be available.
-  _bv:        The bounding values array.
-  _refi:      The index of the frame buffer to filter.
-  _pli:       The color plane to filter.
-  _fragy0:    The Y coordinate of the first fragment row to filter.
-  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end){
-  ogg_int16_t __attribute__((aligned(8)))  ll[4];
-  th_img_plane                            *iplane;
-  oc_fragment_plane                       *fplane;
-  oc_fragment                             *frag_top;
-  oc_fragment                             *frag0;
-  oc_fragment                             *frag;
-  oc_fragment                             *frag_end;
-  oc_fragment                             *frag0_end;
-  oc_fragment                             *frag_bot;
-  ll[0]=ll[1]=ll[2]=ll[3]=
-   (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
-  iplane=_state->ref_frame_bufs[_refi]+_pli;
-  fplane=_state->fplanes+_pli;
-  /*The following loops are constructed somewhat non-intuitively on purpose.
-    The main idea is: if a block boundary has at least one coded fragment on
-     it, the filter is applied to it.
-    However, the order that the filters are applied in matters, and VP3 chose
-     the somewhat strange ordering used below.*/
-  frag_top=_state->frags+fplane->froffset;
-  frag0=frag_top+_fragy0*fplane->nhfrags;
-  frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
-  frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
-  while(frag0<frag0_end){
-    frag=frag0;
-    frag_end=frag+fplane->nhfrags;
-    while(frag<frag_end){
-      if(frag->coded){
-        if(frag>frag0){
-          loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
-        }
-        if(frag0>frag_top){
-          loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
-        }
-        if(frag+1<frag_end&&!(frag+1)->coded){
-          loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
-        }
-        if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
-          loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
-           iplane->stride,ll);
-        }
-      }
-      frag++;
-    }
-    frag0+=fplane->nhfrags;
-  }
-  /*This needs to be removed when decode specific functions are implemented:*/
-  __asm__ __volatile__("emms\n\t");
-}
-
-#endif

+ 0 - 42
Engine/lib/libtheora/lib/dec/x86/x86int.h

@@ -1,42 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_x86int_H)
-# define _x86_x86int_H (1)
-# include "../../internal.h"
-
-void oc_state_vtable_init_x86(oc_theora_state *_state);
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue);
-void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue);
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli);
-void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
-void oc_restore_fpu_mmx(void);
-void oc_idct8x8_mmx(ogg_int16_t _y[64]);
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
-void oc_fill_idct_constants_mmx(void);
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end);
-#endif

+ 0 - 214
Engine/lib/libtheora/lib/dec/x86_vc/mmxfrag.c

@@ -1,214 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id:
-
- ********************************************************************/
-#include "../../internal.h"
-
-/* ------------------------------------------------------------------------
-  MMX reconstruction fragment routines for Visual Studio.
-  Tested with VS2005. Should compile for VS2003 and VC6 as well.
-
-  Initial implementation 2007 by Nils Pipenbrinck.
-  ---------------------------------------------------------------------*/
-
-#if defined(USE_ASM)
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue){
-  /* ---------------------------------------------------------------------
-  This function does the inter reconstruction step with 8 iterations
-  unrolled. The iteration for each instruction is noted by the #id in the
-  comments (in case you want to reconstruct it)
-  --------------------------------------------------------------------- */
-  _asm{
-    mov       edi, [_residue]     /* load residue ptr     */
-    mov       eax, 0x00800080     /* generate constant    */
-    mov       ebx, [_dst_ystride] /* load dst-stride      */
-    mov       edx, [_dst]         /* load dest pointer    */
-
-    /* unrolled loop begins here */
-
-    movd      mm0, eax            /* load constant        */
-    movq      mm1, [edi+ 8*0]     /* #1 load low residue  */
-    movq      mm2, [edi+ 8*1]     /* #1 load high residue */
-    punpckldq mm0, mm0            /* build constant       */
-    movq      mm3, [edi+ 8*2]     /* #2 load low residue  */
-    movq      mm4, [edi+ 8*3]     /* #2 load high residue */
-    movq      mm5, [edi+ 8*4]     /* #3 load low residue  */
-    movq      mm6, [edi+ 8*5]     /* #3 load high residue */
-    paddsw    mm1, mm0            /* #1 bias low  residue */
-    paddsw    mm2, mm0            /* #1 bias high residue */
-    packuswb  mm1, mm2            /* #1 pack to byte      */
-    paddsw    mm3, mm0            /* #2 bias low  residue */
-    paddsw    mm4, mm0            /* #2 bias high residue */
-    packuswb  mm3, mm4            /* #2 pack to byte      */
-    paddsw    mm5, mm0            /* #3 bias low  residue */
-    paddsw    mm6, mm0            /* #3 bias high residue */
-    packuswb  mm5, mm6            /* #3 pack to byte      */
-    movq      [edx], mm1          /* #1 write row         */
-    movq      [edx + ebx], mm3    /* #2 write row         */
-    movq      [edx + ebx*2], mm5  /* #3 write row         */
-    movq      mm1, [edi+ 8*6]     /* #4 load low residue  */
-    lea       ecx, [ebx + ebx*2]  /* make dst_ystride * 3 */
-    movq      mm2, [edi+ 8*7]     /* #4 load high residue */
-    movq      mm3, [edi+ 8*8]     /* #5 load low residue  */
-    lea       esi, [ebx*4 + ebx]  /* make dst_ystride * 5 */
-    movq      mm4, [edi+ 8*9]     /* #5 load high residue */
-    movq      mm5, [edi+ 8*10]    /* #6 load low residue  */
-    lea       eax, [ecx*2 + ebx]  /* make dst_ystride * 7 */
-    movq      mm6, [edi+ 8*11]    /* #6 load high residue */
-    paddsw    mm1, mm0            /* #4 bias low  residue */
-    paddsw    mm2, mm0            /* #4 bias high residue */
-    packuswb  mm1, mm2            /* #4 pack to byte      */
-    paddsw    mm3, mm0            /* #5 bias low  residue */
-    paddsw    mm4, mm0            /* #5 bias high residue */
-    packuswb  mm3, mm4            /* #5 pack to byte      */
-    paddsw    mm5, mm0            /* #6 bias low  residue */
-    paddsw    mm6, mm0            /* #6 bias high residue */
-    packuswb  mm5, mm6            /* #6 pack to byte      */
-    movq      [edx + ecx], mm1    /* #4 write row         */
-    movq      [edx + ebx*4], mm3  /* #5 write row         */
-    movq      [edx + esi], mm5    /* #6 write row         */
-    movq      mm1, [edi+ 8*12]    /* #7 load low residue  */
-    movq      mm2, [edi+ 8*13]    /* #7 load high residue */
-    movq      mm3, [edi+ 8*14]    /* #8 load low residue  */
-    movq      mm4, [edi+ 8*15]    /* #8 load high residue */
-    paddsw    mm1, mm0            /* #7 bias low  residue */
-    paddsw    mm2, mm0            /* #7 bias high residue */
-    packuswb  mm1, mm2            /* #7 pack to byte      */
-    paddsw    mm3, mm0            /* #8 bias low  residue */
-    paddsw    mm4, mm0            /* #8 bias high residue */
-    packuswb  mm3, mm4            /* #8 pack to byte      */
-    movq      [edx + ecx*2], mm1  /* #7 write row         */
-    movq      [edx + eax], mm3    /* #8 write row         */
-  }
-}
-
-
-
-void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
- const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
-  /* ---------------------------------------------------------------------
-  This function does the inter reconstruction step with two iterations
-  running in parallel to hide some load-latencies and break the dependency
-  chains. The iteration for each instruction is noted by the #id in the
-  comments (in case you want to reconstruct it)
-  --------------------------------------------------------------------- */
-  _asm{
-    pxor      mm0, mm0          /* generate constant 0 */
-    mov       esi, [_src]
-    mov       edi, [_residue]
-    mov       eax, [_src_ystride]
-    mov       edx, [_dst]
-    mov       ebx, [_dst_ystride]
-    mov       ecx, 4
-
-    align 16
-
-nextchunk:
-    movq      mm3, [esi]        /* #1 load source        */
-    movq      mm1, [edi+0]      /* #1 load residium low  */
-    movq      mm2, [edi+8]      /* #1 load residium high */
-    movq      mm7, [esi+eax]    /* #2 load source        */
-    movq      mm4, mm3          /* #1 get copy of src    */
-    movq      mm5, [edi+16]     /* #2 load residium low  */
-    punpckhbw mm4, mm0          /* #1 expand high source */
-    movq      mm6, [edi+24]     /* #2 load residium high */
-    punpcklbw mm3, mm0          /* #1 expand low  source */
-    paddsw    mm4, mm2          /* #1 add residium high  */
-    movq      mm2, mm7          /* #2 get copy of src    */
-    paddsw    mm3, mm1          /* #1 add residium low   */
-    punpckhbw mm2, mm0          /* #2 expand high source */
-    packuswb  mm3, mm4          /* #1 final row pixels   */
-    punpcklbw mm7, mm0          /* #2 expand low  source */
-    movq      [edx], mm3        /* #1 write row          */
-    paddsw    mm2, mm6          /* #2 add residium high  */
-    add       edi, 32           /* residue += 4          */
-    paddsw    mm7, mm5          /* #2 add residium low   */
-    sub       ecx, 1            /* update loop counter   */
-    packuswb  mm7, mm2          /* #2 final row          */
-    lea       esi, [esi+eax*2]  /* src += stride * 2     */
-    movq      [edx + ebx], mm7  /* #2 write row          */
-    lea       edx, [edx+ebx*2]  /* dst += stride * 2     */
-    jne       nextchunk
-  }
-}
-
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,  int _dst_ystride,
- const unsigned char *_src1,  int _src1_ystride, const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue){
-  /* ---------------------------------------------------------------------
-  This function does the inter2 reconstruction step.The building of the
-  average is done with a bit-twiddeling trick to avoid excessive register
-  copy work during byte to word conversion.
-
-              average = (a & b) + (((a ^ b) & 0xfe) >> 1);
-
-  (shown for a single byte; it's done with 8 of them at a time)
-
-  Slightly faster than the obvious method using add and shift, but not
-  earthshaking improvement either.
-
-  If anyone comes up with a way that produces bit-identical outputs
-  using the pavgb instruction let me know and I'll do the 3dnow codepath.
-  --------------------------------------------------------------------- */
- _asm{
-   mov        eax, 0xfefefefe
-   mov        esi, [_src1]
-   mov        edi, [_src2]
-   movd       mm1, eax
-   mov        ebx, [_residue]
-   mov        edx, [_dst]
-   mov        eax, [_dst_ystride]
-   punpckldq  mm1, mm1            /* replicate lsb32     */
-   mov        ecx, 8              /* init loop counter   */
-   pxor       mm0, mm0            /* constant zero       */
-   sub        edx, eax            /* dst -= dst_stride   */
-
-   align      16
-
-nextrow:
-   movq       mm2,  [esi]         /* load source1        */
-   movq       mm3,  [edi]         /* load source2        */
-   movq       mm5,  [ebx + 0]     /* load lower residue  */
-   movq       mm6,  [ebx + 8]     /* load higer residue  */
-   add        esi,  _src1_ystride /* src1 += src1_stride */
-   add        edi,  _src2_ystride /* src2 += src1_stride */
-   movq       mm4,  mm2           /* get copy of source1 */
-   pand       mm2,  mm3           /* s1 & s2 (avg part)  */
-   pxor       mm3,  mm4           /* s1 ^ s2 (avg part)  */
-   add        ebx,  16            /* residue++           */
-   pand       mm3,  mm1           /* mask out low bits   */
-   psrlq      mm3,  1             /* shift xor avg-part  */
-   paddd      mm3,  mm2           /* build final average */
-   add        edx,  eax           /* dst += dst_stride   */
-   movq       mm2,  mm3           /* get copy of average */
-   punpckhbw  mm3,  mm0           /* average high        */
-   punpcklbw  mm2,  mm0           /* average low         */
-   paddsw     mm3,  mm6           /* high + residue      */
-   paddsw     mm2,  mm5           /* low  + residue      */
-   sub        ecx,  1             /* update loop counter */
-   packuswb   mm2,  mm3           /* pack and saturate   */
-   movq       [edx], mm2          /* write row           */
-   jne        nextrow
- }
-}
-
-void oc_restore_fpu_mmx(void){
-  _asm { emms }
-}
-
-#endif

+ 0 - 1006
Engine/lib/libtheora/lib/dec/x86_vc/mmxidct.c

@@ -1,1006 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id:
-
- ********************************************************************/
-
-/* -------------------------------------------------------------------
-  MMX based IDCT for the theora codec.
-
-  Originally written by Rudolf Marek, based on code from On2's VP3.
-  Converted to Visual Studio inline assembly by Nils Pipenbrinck.
-
-  ---------------------------------------------------------------------*/
-#if defined(USE_ASM)
-
-#include <ogg/ogg.h>
-#include "../dct.h"
-#include "../idct.h"
-#include "x86int.h"
-
-/*A table of constants used by the MMX routines.*/
-static const __declspec(align(16)) ogg_uint16_t
- OC_IDCT_CONSTS[(7+1)*4]={
-  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
-  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
-  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
-  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
-  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
-  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
-  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
-  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
-  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
-  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
-  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
-  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
-  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
-  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
-      8,    8,    8,    8
-};
-
-
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){
-  _asm {
-    mov     edx, [_y]
-    mov     eax, offset OC_IDCT_CONSTS
-    movq    mm2, [edx + 30H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 18H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 10H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 38H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 20H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 28H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 10H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 20H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 08H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 10H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    movq    mm3, [edx + 20H]
-    psubw   mm4, mm7
-    paddw   mm1, mm1
-    paddw   mm7, mm7
-    paddw   mm1, mm2
-    paddw   mm7, mm4
-    psubw   mm4, mm3
-    paddw   mm3, mm3
-    psubw   mm6, mm5
-    paddw   mm5, mm5
-    paddw   mm3, mm4
-    paddw   mm5, mm6
-    psubw   mm7, mm0
-    paddw   mm0, mm0
-    movq    [edx + 10H], mm1
-    paddw   mm0, mm7
-    movq    mm1, mm4
-    punpcklwd mm4, mm5
-    movq    [edx], mm0
-    punpckhwd mm1, mm5
-    movq    mm0, mm6
-    punpcklwd mm6, mm7
-    movq    mm5, mm4
-    punpckldq mm4, mm6
-    punpckhdq mm5, mm6
-    movq    mm6, mm1
-    movq    [edx + 08H], mm4
-    punpckhwd mm0, mm7
-    movq    [edx + 18H], mm5
-    punpckhdq mm6, mm0
-    movq    mm4, [edx]
-    punpckldq mm1, mm0
-    movq    mm5, [edx + 10H]
-    movq    mm0, mm4
-    movq    [edx + 38H], mm6
-    punpcklwd mm0, mm5
-    movq    [edx + 28H], mm1
-    punpckhwd mm4, mm5
-    movq    mm5, mm2
-    punpcklwd mm2, mm3
-    movq    mm1, mm0
-    punpckldq mm0, mm2
-    punpckhdq mm1, mm2
-    movq    mm2, mm4
-    movq    [edx], mm0
-    punpckhwd mm5, mm3
-    movq    [edx + 10H], mm1
-    punpckhdq mm4, mm5
-    punpckldq mm2, mm5
-    movq    [edx + 30H], mm4
-    movq    [edx + 20H], mm2
-    movq    mm2, [edx + 70H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 58H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 50H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 78H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 60H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 68H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 50H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 60H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx + 40H]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 48H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 50H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    movq    mm3, [edx + 60H]
-    psubw   mm4, mm7
-    paddw   mm1, mm1
-    paddw   mm7, mm7
-    paddw   mm1, mm2
-    paddw   mm7, mm4
-    psubw   mm4, mm3
-    paddw   mm3, mm3
-    psubw   mm6, mm5
-    paddw   mm5, mm5
-    paddw   mm3, mm4
-    paddw   mm5, mm6
-    psubw   mm7, mm0
-    paddw   mm0, mm0
-    movq    [edx + 50H], mm1
-    paddw   mm0, mm7
-    movq    mm1, mm4
-    punpcklwd mm4, mm5
-    movq    [edx + 40H], mm0
-    punpckhwd mm1, mm5
-    movq    mm0, mm6
-    punpcklwd mm6, mm7
-    movq    mm5, mm4
-    punpckldq mm4, mm6
-    punpckhdq mm5, mm6
-    movq    mm6, mm1
-    movq    [edx + 48H], mm4
-    punpckhwd mm0, mm7
-    movq    [edx + 58H], mm5
-    punpckhdq mm6, mm0
-    movq    mm4, [edx + 40H]
-    punpckldq mm1, mm0
-    movq    mm5, [edx + 50H]
-    movq    mm0, mm4
-    movq    [edx + 78H], mm6
-    punpcklwd mm0, mm5
-    movq    [edx + 68H], mm1
-    punpckhwd mm4, mm5
-    movq    mm5, mm2
-    punpcklwd mm2, mm3
-    movq    mm1, mm0
-    punpckldq mm0, mm2
-    punpckhdq mm1, mm2
-    movq    mm2, mm4
-    movq    [edx + 40H], mm0
-    punpckhwd mm5, mm3
-    movq    [edx + 50H], mm1
-    punpckhdq mm4, mm5
-    punpckldq mm2, mm5
-    movq    [edx + 70H], mm4
-    movq    [edx + 60H], mm2
-    movq    mm2, [edx + 30H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 50H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 10H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 70H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 20H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 60H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 10H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 20H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 40H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 10H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    paddw   mm2, [eax + 38H]
-    paddw   mm1, mm1
-    paddw   mm1, mm2
-    psraw   mm2, 4
-    psubw   mm4, mm7
-    psraw   mm1, 4
-    movq    mm3, [edx + 20H]
-    paddw   mm7, mm7
-    movq    [edx + 20H], mm2
-    paddw   mm7, mm4
-    movq    [edx + 10H], mm1
-    psubw   mm4, mm3
-    paddw   mm4, [eax + 38H]
-    paddw   mm3, mm3
-    paddw   mm3, mm4
-    psraw   mm4, 4
-    psubw   mm6, mm5
-    psraw   mm3, 4
-    paddw   mm6, [eax + 38H]
-    paddw   mm5, mm5
-    paddw   mm5, mm6
-    psraw   mm6, 4
-    movq    [edx + 40H], mm4
-    psraw   mm5, 4
-    movq    [edx + 30H], mm3
-    psubw   mm7, mm0
-    paddw   mm7, [eax + 38H]
-    paddw   mm0, mm0
-    paddw   mm0, mm7
-    psraw   mm7, 4
-    movq    [edx + 60H], mm6
-    psraw   mm0, 4
-    movq    [edx + 50H], mm5
-    movq    [edx + 70H], mm7
-    movq    [edx], mm0
-    movq    mm2, [edx + 38H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 58H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 18H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 78H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 28H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 68H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 18H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 28H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx + 08H]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 48H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 18H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    paddw   mm2, [eax + 38H]
-    paddw   mm1, mm1
-    paddw   mm1, mm2
-    psraw   mm2, 4
-    psubw   mm4, mm7
-    psraw   mm1, 4
-    movq    mm3, [edx + 28H]
-    paddw   mm7, mm7
-    movq    [edx + 28H], mm2
-    paddw   mm7, mm4
-    movq    [edx + 18H], mm1
-    psubw   mm4, mm3
-    paddw   mm4, [eax + 38H]
-    paddw   mm3, mm3
-    paddw   mm3, mm4
-    psraw   mm4, 4
-    psubw   mm6, mm5
-    psraw   mm3, 4
-    paddw   mm6, [eax + 38H]
-    paddw   mm5, mm5
-    paddw   mm5, mm6
-    psraw   mm6, 4
-    movq    [edx + 48H], mm4
-    psraw   mm5, 4
-    movq    [edx + 38H], mm3
-    psubw   mm7, mm0
-    paddw   mm7, [eax + 38H]
-    paddw   mm0, mm0
-    paddw   mm0, mm7
-    psraw   mm7, 4
-    movq    [edx + 68H], mm6
-    psraw   mm0, 4
-    movq    [edx + 58H], mm5
-    movq    [edx + 78H], mm7
-    movq    [edx + 08H], mm0
-    /* emms  */
-  }
-}
-
-
-void oc_idct8x8_mmx(ogg_int16_t _y[64]){
-  _asm {
-    mov     edx, [_y]
-    mov     eax, offset OC_IDCT_CONSTS
-    movq    mm2, [edx + 30H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 18H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 10H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 38H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 20H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 28H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 10H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 20H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 08H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 10H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    movq    mm3, [edx + 20H]
-    psubw   mm4, mm7
-    paddw   mm1, mm1
-    paddw   mm7, mm7
-    paddw   mm1, mm2
-    paddw   mm7, mm4
-    psubw   mm4, mm3
-    paddw   mm3, mm3
-    psubw   mm6, mm5
-    paddw   mm5, mm5
-    paddw   mm3, mm4
-    paddw   mm5, mm6
-    psubw   mm7, mm0
-    paddw   mm0, mm0
-    movq    [edx + 10H], mm1
-    paddw   mm0, mm7
-    movq    mm1, mm4
-    punpcklwd mm4, mm5
-    movq    [edx], mm0
-    punpckhwd mm1, mm5
-    movq    mm0, mm6
-    punpcklwd mm6, mm7
-    movq    mm5, mm4
-    punpckldq mm4, mm6
-    punpckhdq mm5, mm6
-    movq    mm6, mm1
-    movq    [edx + 08H], mm4
-    punpckhwd mm0, mm7
-    movq    [edx + 18H], mm5
-    punpckhdq mm6, mm0
-    movq    mm4, [edx]
-    punpckldq mm1, mm0
-    movq    mm5, [edx + 10H]
-    movq    mm0, mm4
-    movq    [edx + 38H], mm6
-    punpcklwd mm0, mm5
-    movq    [edx + 28H], mm1
-    punpckhwd mm4, mm5
-    movq    mm5, mm2
-    punpcklwd mm2, mm3
-    movq    mm1, mm0
-    punpckldq mm0, mm2
-    punpckhdq mm1, mm2
-    movq    mm2, mm4
-    movq    [edx], mm0
-    punpckhwd mm5, mm3
-    movq    [edx + 10H], mm1
-    punpckhdq mm4, mm5
-    punpckldq mm2, mm5
-    movq    [edx + 30H], mm4
-    movq    [edx + 20H], mm2
-    movq    mm2, [edx + 70H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 58H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 50H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 78H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 60H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 68H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 50H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 60H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx + 40H]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 48H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 50H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    movq    mm3, [edx + 60H]
-    psubw   mm4, mm7
-    paddw   mm1, mm1
-    paddw   mm7, mm7
-    paddw   mm1, mm2
-    paddw   mm7, mm4
-    psubw   mm4, mm3
-    paddw   mm3, mm3
-    psubw   mm6, mm5
-    paddw   mm5, mm5
-    paddw   mm3, mm4
-    paddw   mm5, mm6
-    psubw   mm7, mm0
-    paddw   mm0, mm0
-    movq    [edx + 50H], mm1
-    paddw   mm0, mm7
-    movq    mm1, mm4
-    punpcklwd mm4, mm5
-    movq    [edx + 40H], mm0
-    punpckhwd mm1, mm5
-    movq    mm0, mm6
-    punpcklwd mm6, mm7
-    movq    mm5, mm4
-    punpckldq mm4, mm6
-    punpckhdq mm5, mm6
-    movq    mm6, mm1
-    movq    [edx + 48H], mm4
-    punpckhwd mm0, mm7
-    movq    [edx + 58H], mm5
-    punpckhdq mm6, mm0
-    movq    mm4, [edx + 40H]
-    punpckldq mm1, mm0
-    movq    mm5, [edx + 50H]
-    movq    mm0, mm4
-    movq    [edx + 78H], mm6
-    punpcklwd mm0, mm5
-    movq    [edx + 68H], mm1
-    punpckhwd mm4, mm5
-    movq    mm5, mm2
-    punpcklwd mm2, mm3
-    movq    mm1, mm0
-    punpckldq mm0, mm2
-    punpckhdq mm1, mm2
-    movq    mm2, mm4
-    movq    [edx + 40H], mm0
-    punpckhwd mm5, mm3
-    movq    [edx + 50H], mm1
-    punpckhdq mm4, mm5
-    punpckldq mm2, mm5
-    movq    [edx + 70H], mm4
-    movq    [edx + 60H], mm2
-    movq    mm2, [edx + 30H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 50H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 10H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 70H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 20H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 60H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 10H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 20H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 40H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 10H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    paddw   mm2, [eax + 38H]
-    paddw   mm1, mm1
-    paddw   mm1, mm2
-    psraw   mm2, 4
-    psubw   mm4, mm7
-    psraw   mm1, 4
-    movq    mm3, [edx + 20H]
-    paddw   mm7, mm7
-    movq    [edx + 20H], mm2
-    paddw   mm7, mm4
-    movq    [edx + 10H], mm1
-    psubw   mm4, mm3
-    paddw   mm4, [eax + 38H]
-    paddw   mm3, mm3
-    paddw   mm3, mm4
-    psraw   mm4, 4
-    psubw   mm6, mm5
-    psraw   mm3, 4
-    paddw   mm6, [eax + 38H]
-    paddw   mm5, mm5
-    paddw   mm5, mm6
-    psraw   mm6, 4
-    movq    [edx + 40H], mm4
-    psraw   mm5, 4
-    movq    [edx + 30H], mm3
-    psubw   mm7, mm0
-    paddw   mm7, [eax + 38H]
-    paddw   mm0, mm0
-    paddw   mm0, mm7
-    psraw   mm7, 4
-    movq    [edx + 60H], mm6
-    psraw   mm0, 4
-    movq    [edx + 50H], mm5
-    movq    [edx + 70H], mm7
-    movq    [edx], mm0
-    movq    mm2, [edx + 38H]
-    movq    mm6, [eax + 10H]
-    movq    mm4, mm2
-    movq    mm7, [edx + 58H]
-    pmulhw  mm4, mm6
-    movq    mm1, [eax + 20H]
-    pmulhw  mm6, mm7
-    movq    mm5, mm1
-    pmulhw  mm1, mm2
-    movq    mm3, [edx + 18H]
-    pmulhw  mm5, mm7
-    movq    mm0, [eax]
-    paddw   mm4, mm2
-    paddw   mm6, mm7
-    paddw   mm2, mm1
-    movq    mm1, [edx + 78H]
-    paddw   mm7, mm5
-    movq    mm5, mm0
-    pmulhw  mm0, mm3
-    paddw   mm4, mm7
-    pmulhw  mm5, mm1
-    movq    mm7, [eax + 30H]
-    psubw   mm6, mm2
-    paddw   mm0, mm3
-    pmulhw  mm3, mm7
-    movq    mm2, [edx + 28H]
-    pmulhw  mm7, mm1
-    paddw   mm5, mm1
-    movq    mm1, mm2
-    pmulhw  mm2, [eax + 08H]
-    psubw   mm3, mm5
-    movq    mm5, [edx + 68H]
-    paddw   mm0, mm7
-    movq    mm7, mm5
-    psubw   mm0, mm4
-    pmulhw  mm5, [eax + 08H]
-    paddw   mm2, mm1
-    pmulhw  mm1, [eax + 28H]
-    paddw   mm4, mm4
-    paddw   mm4, mm0
-    psubw   mm3, mm6
-    paddw   mm5, mm7
-    paddw   mm6, mm6
-    pmulhw  mm7, [eax + 28H]
-    paddw   mm6, mm3
-    movq    [edx + 18H], mm4
-    psubw   mm1, mm5
-    movq    mm4, [eax + 18H]
-    movq    mm5, mm3
-    pmulhw  mm3, mm4
-    paddw   mm7, mm2
-    movq    [edx + 28H], mm6
-    movq    mm2, mm0
-    movq    mm6, [edx + 08H]
-    pmulhw  mm0, mm4
-    paddw   mm5, mm3
-    movq    mm3, [edx + 48H]
-    psubw   mm5, mm1
-    paddw   mm2, mm0
-    psubw   mm6, mm3
-    movq    mm0, mm6
-    pmulhw  mm6, mm4
-    paddw   mm3, mm3
-    paddw   mm1, mm1
-    paddw   mm3, mm0
-    paddw   mm1, mm5
-    pmulhw  mm4, mm3
-    paddw   mm6, mm0
-    psubw   mm6, mm2
-    paddw   mm2, mm2
-    movq    mm0, [edx + 18H]
-    paddw   mm2, mm6
-    paddw   mm4, mm3
-    psubw   mm2, mm1
-    paddw   mm2, [eax + 38H]
-    paddw   mm1, mm1
-    paddw   mm1, mm2
-    psraw   mm2, 4
-    psubw   mm4, mm7
-    psraw   mm1, 4
-    movq    mm3, [edx + 28H]
-    paddw   mm7, mm7
-    movq    [edx + 28H], mm2
-    paddw   mm7, mm4
-    movq    [edx + 18H], mm1
-    psubw   mm4, mm3
-    paddw   mm4, [eax + 38H]
-    paddw   mm3, mm3
-    paddw   mm3, mm4
-    psraw   mm4, 4
-    psubw   mm6, mm5
-    psraw   mm3, 4
-    paddw   mm6, [eax + 38H]
-    paddw   mm5, mm5
-    paddw   mm5, mm6
-    psraw   mm6, 4
-    movq    [edx + 48H], mm4
-    psraw   mm5, 4
-    movq    [edx + 38H], mm3
-    psubw   mm7, mm0
-    paddw   mm7, [eax + 38H]
-    paddw   mm0, mm0
-    paddw   mm0, mm7
-    psraw   mm7, 4
-    movq    [edx + 68H], mm6
-    psraw   mm0, 4
-    movq    [edx + 58H], mm5
-    movq    [edx + 78H], mm7
-    movq    [edx + 08H], mm0
-    /* emms  */
-  }
-}
-
-#endif

+ 0 - 377
Engine/lib/libtheora/lib/dec/x86_vc/mmxloopfilter.c

@@ -1,377 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id:
-
- ********************************************************************/
-
-/* -------------------------------------------------------------------
-  MMX based loop filter for the theora codec.
-
-  Originally written by Rudolf Marek, based on code from On2's VP3.
-  Converted to Visual Studio inline assembly by Nils Pipenbrinck.
-
-  Note: I can't test these since my example files never get into the
-  loop filters, but the code has been converted semi-automatic from
-  the GCC sources, so it ought to work.
-  ---------------------------------------------------------------------*/
-#include "../../internal.h"
-#include "x86int.h"
-#include <mmintrin.h>
-
-#if defined(USE_ASM)
-
-
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
-                          const ogg_int16_t *_ll){
-  _asm {
-    mov       eax,  [_pix]
-    mov       edx,  [_ystride]
-    mov       ebx,  [_ll]
-
-    /* _pix -= ystride */
-    sub       eax,   edx
-    /*  mm0=0          */
-    pxor      mm0,   mm0
-    /* _pix -= ystride */
-    sub       eax,   edx
-    /*  esi=_ystride*3 */
-    lea       esi, [edx + edx*2]
-
-    /*  mm7=_pix[0...8]*/
-    movq      mm7, [eax]
-    /*  mm4=_pix[0...8+_ystride*3]*/
-    movq      mm4, [eax + esi]
-    /*  mm6=_pix[0...8]*/
-    movq      mm6, mm7
-    /*  Expand unsigned _pix[0...3] to 16 bits.*/
-    punpcklbw mm6, mm0
-    movq      mm5, mm4
-    /*  Expand unsigned _pix[4...7] to 16 bits.*/
-    punpckhbw mm7, mm0
-    punpcklbw mm4, mm0
-    /*  Expand other arrays too.*/
-    punpckhbw mm5, mm0
-    /*mm7:mm6=_p[0...7]-_p[0...7+_ystride*3]:*/
-    psubw     mm6, mm4
-    psubw     mm7, mm5
-    /*mm5=mm4=_pix[0...7+_ystride]*/
-    movq      mm4, [eax + edx]
-    /*mm1=mm3=mm2=_pix[0..7]+_ystride*2]*/
-    movq      mm2, [eax + edx*2]
-    movq      mm5, mm4
-    movq      mm3, mm2
-    movq      mm1, mm2
-    /*Expand these arrays.*/
-    punpckhbw mm5, mm0
-    punpcklbw mm4, mm0
-    punpckhbw mm3, mm0
-    punpcklbw mm2, mm0
-    pcmpeqw   mm0, mm0
-    /*mm0=3 3 3 3
-    mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
-    psubw     mm3, mm5
-    psrlw     mm0, 14
-    psubw     mm2, mm4
-    /*Scale by 3.*/
-    pmullw    mm3, mm0
-    pmullw    mm2, mm0
-    /*mm0=4 4 4 4
-    f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
-     3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
-    psrlw     mm0, 1
-    paddw     mm3, mm7
-    psllw     mm0, 2
-    paddw     mm2, mm6
-    /*Add 4.*/
-    paddw     mm3, mm0
-    paddw     mm2, mm0
-    /*"Divide" by 8.*/
-    psraw     mm3, 3
-    psraw     mm2, 3
-    /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
-    /*Free up mm5.*/
-    packuswb  mm4, mm5
-    /*mm0=L L L L*/
-    movq      mm0, [ebx]
-    /*if(R_i<-2L||R_i>2L)R_i=0:*/
-    movq      mm5, mm2
-    pxor      mm6, mm6
-    movq      mm7, mm0
-    psubw     mm6, mm0
-    psllw     mm7, 1
-    psllw     mm6, 1
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    pcmpgtw   mm7, mm2
-    pcmpgtw   mm5, mm6
-    pand      mm2, mm7
-    movq      mm7, mm0
-    pand      mm2, mm5
-    psllw     mm7, 1
-    movq      mm5, mm3
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    pcmpgtw   mm7, mm3
-    pcmpgtw   mm5, mm6
-    pand      mm3, mm7
-    movq      mm7, mm0
-    pand      mm3, mm5
-   /*if(R_i<-L)R_i'=R_i+2L;
-     if(R_i>L)R_i'=R_i-2L;
-     if(R_i<-L||R_i>L)R_i=-R_i':*/
-    psraw     mm6, 1
-    movq      mm5, mm2
-    psllw     mm7, 1
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm5=R_i>L?FF:00*/
-    pcmpgtw   mm5, mm0
-    /*mm6=-L>R_i?FF:00*/
-    pcmpgtw   mm6, mm2
-    /*mm7=R_i>L?2L:0*/
-    pand      mm7, mm5
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    psubw     mm2, mm7
-    movq      mm7, mm0
-    /*mm5=-L>R_i||R_i>L*/
-    por       mm5, mm6
-    psllw     mm7, 1
-    /*mm7=-L>R_i?2L:0*/
-    pand      mm7, mm6
-    pxor      mm6, mm6
-    /*mm2=-L>R_i?R_i+2L:R_i*/
-    paddw     mm2, mm7
-    psubw     mm6, mm0
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    pand      mm5, mm2
-    movq      mm7, mm0
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    psubw     mm2, mm5
-    psllw     mm7, 1
-    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
-    psubw     mm2, mm5
-    movq      mm5, mm3
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm6=-L>R_i?FF:00*/
-    pcmpgtw   mm6, mm3
-    /*mm5=R_i>L?FF:00*/
-    pcmpgtw   mm5, mm0
-    /*mm7=R_i>L?2L:0*/
-    pand      mm7, mm5
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    psubw     mm3, mm7
-    psllw     mm0, 1
-    /*mm5=-L>R_i||R_i>L*/
-    por       mm5, mm6
-    /*mm0=-L>R_i?2L:0*/
-    pand      mm0, mm6
-    /*mm3=-L>R_i?R_i+2L:R_i*/
-    paddw     mm3, mm0
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    pand      mm5, mm3
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    psubw     mm3, mm5
-    /*mm3=-L>R_i||R_i>L?-R_i':R_i*/
-    psubw     mm3, mm5
-    /*Unfortunately, there's no unsigned byte+signed byte with unsigned
-       saturation op code, so we have to promote things back 16 bits.*/
-    pxor      mm0, mm0
-    movq      mm5, mm4
-    punpcklbw mm4, mm0
-    punpckhbw mm5, mm0
-    movq      mm6, mm1
-    punpcklbw mm1, mm0
-    punpckhbw mm6, mm0
-    /*_pix[0...8+_ystride]+=R_i*/
-    paddw     mm4, mm2
-    paddw     mm5, mm3
-    /*_pix[0...8+_ystride*2]-=R_i*/
-    psubw     mm1, mm2
-    psubw     mm6, mm3
-    packuswb  mm4, mm5
-    packuswb  mm1, mm6
-    /*Write it back out.*/
-    movq    [eax + edx], mm4
-    movq    [eax + edx*2], mm1
-  }
-}
-
-/*This code implements the bulk of loop_filter_h().
-  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
-   four p0's to one register we must transpose the values in four mmx regs.
-  When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
-                           const ogg_int16_t *_ll){
-  /* todo: merge the comments from the GCC sources */
-  _asm {
-    mov   ecx, [_pix]
-    mov   edx, [_ystride]
-    mov   eax, [_ll]
-    /*esi=_ystride*3*/
-    lea     esi, [edx + edx*2]
-
-    movd    mm0, dword ptr [ecx]
-    movd    mm1, dword ptr [ecx + edx]
-    movd    mm2, dword ptr [ecx + edx*2]
-    movd    mm3, dword ptr [ecx + esi]
-    punpcklbw mm0, mm1
-    punpcklbw mm2, mm3
-    movq    mm1, mm0
-    punpckhwd mm0, mm2
-    punpcklwd mm1, mm2
-    pxor    mm7, mm7
-    movq    mm5, mm1
-    punpcklbw mm1, mm7
-    punpckhbw mm5, mm7
-    movq    mm3, mm0
-    punpcklbw mm0, mm7
-    punpckhbw mm3, mm7
-    psubw   mm1, mm3
-    movq    mm4, mm0
-    pcmpeqw mm2, mm2
-    psubw   mm0, mm5
-    psrlw   mm2, 14
-    pmullw  mm0, mm2
-    psrlw   mm2, 1
-    paddw   mm0, mm1
-    psllw   mm2, 2
-    paddw   mm0, mm2
-    psraw   mm0, 3
-    movq    mm6, qword ptr [eax]
-    movq    mm1, mm0
-    pxor    mm2, mm2
-    movq    mm3, mm6
-    psubw   mm2, mm6
-    psllw   mm3, 1
-    psllw   mm2, 1
-    pcmpgtw mm3, mm0
-    pcmpgtw mm1, mm2
-    pand    mm0, mm3
-    pand    mm0, mm1
-    psraw   mm2, 1
-    movq    mm1, mm0
-    movq    mm3, mm6
-    pcmpgtw mm2, mm0
-    pcmpgtw mm1, mm6
-    psllw   mm3, 1
-    psllw   mm6, 1
-    pand    mm3, mm1
-    pand    mm6, mm2
-    psubw   mm0, mm3
-    por     mm1, mm2
-    paddw   mm0, mm6
-    pand    mm1, mm0
-    psubw   mm0, mm1
-    psubw   mm0, mm1
-    paddw   mm5, mm0
-    psubw   mm4, mm0
-    packuswb mm5, mm7
-    packuswb mm4, mm7
-    punpcklbw mm5, mm4
-    movd    edi, mm5
-    mov     word ptr [ecx + 01H], di
-    psrlq   mm5, 32
-    shr     edi, 16
-    mov     word ptr [ecx + edx + 01H], di
-    movd    edi, mm5
-    mov     word ptr [ecx + edx*2 + 01H], di
-    shr     edi, 16
-    mov     word ptr [ecx + esi + 01H], di
-  }
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
-                          const ogg_int16_t *_ll){
-  _pix-=2;
-  loop_filter_h4(_pix,_ystride,_ll);
-  loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-
-/*We copy the whole function because the MMX routines will be inlined 4 times,
-   and we can do just a single emms call at the end this way.
-  We also do not use the _bv lookup table, instead computing the values that
-   would lie in it on the fly.*/
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
-  The filter may be run on the bottom edge, affecting pixels in the next row of
-   fragments, so this row also needs to be available.
-  _bv:        The bounding values array.
-  _refi:      The index of the frame buffer to filter.
-  _pli:       The color plane to filter.
-  _fragy0:    The Y coordinate of the first fragment row to filter.
-  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end){
-  ogg_int16_t __declspec(align(8))        ll[4];
-  th_img_plane                            *iplane;
-  oc_fragment_plane                       *fplane;
-  oc_fragment                             *frag_top;
-  oc_fragment                             *frag0;
-  oc_fragment                             *frag;
-  oc_fragment                             *frag_end;
-  oc_fragment                             *frag0_end;
-  oc_fragment                             *frag_bot;
-  ll[0]=ll[1]=ll[2]=ll[3]=
-   (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
-  iplane=_state->ref_frame_bufs[_refi]+_pli;
-  fplane=_state->fplanes+_pli;
-  /*The following loops are constructed somewhat non-intuitively on purpose.
-    The main idea is: if a block boundary has at least one coded fragment on
-     it, the filter is applied to it.
-    However, the order that the filters are applied in matters, and VP3 chose
-     the somewhat strange ordering used below.*/
-  frag_top=_state->frags+fplane->froffset;
-  frag0=frag_top+_fragy0*fplane->nhfrags;
-  frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
-  frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
-  while(frag0<frag0_end){
-    frag=frag0;
-    frag_end=frag+fplane->nhfrags;
-    while(frag<frag_end){
-      if(frag->coded){
-        if(frag>frag0){
-          loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
-        }
-        if(frag0>frag_top){
-          loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
-        }
-        if(frag+1<frag_end&&!(frag+1)->coded){
-          loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
-        }
-        if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
-          loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
-           iplane->stride,ll);
-        }
-      }
-      frag++;
-    }
-    frag0+=fplane->nhfrags;
-  }
-
-  /*This needs to be removed when decode specific functions are implemented:*/
-  _mm_empty();
-}
-
-#endif

+ 0 - 189
Engine/lib/libtheora/lib/dec/x86_vc/mmxstate.c

@@ -1,189 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-/* ------------------------------------------------------------------------
-  MMX acceleration of complete fragment reconstruction algorithm.
-    Originally written by Rudolf Marek.
-
-  Conversion to MSC intrinsics by Nils Pipenbrinck.
-  ---------------------------------------------------------------------*/
-#if defined(USE_ASM)
-
-#include "../../internal.h"
-#include "../idct.h"
-#include "x86int.h"
-#include <mmintrin.h>
-
-static const unsigned char OC_FZIG_ZAGMMX[64]=
-{
-   0, 8, 1, 2, 9,16,24,17,
-  10, 3,32,11,18,25, 4,12,
-   5,26,19,40,33,34,41,48,
-  27, 6,13,20,28,21,14, 7,
-  56,49,42,35,43,50,57,36,
-  15,22,29,30,23,44,37,58,
-  51,59,38,45,52,31,60,53,
-  46,39,47,54,61,62,55,63
-};
-
-/* Fill a block with value */
-static __inline void loc_fill_mmx_value (__m64 * _dst, __m64 _value){
-  __m64 t   = _value;
-  _dst[0]  = t;  _dst[1]  = t;  _dst[2]  = t;  _dst[3]  = t;
-  _dst[4]  = t;  _dst[5]  = t;  _dst[6]  = t;  _dst[7]  = t;
-  _dst[8]  = t;  _dst[9]  = t;  _dst[10] = t;  _dst[11] = t;
-  _dst[12] = t;  _dst[13] = t;  _dst[14] = t;  _dst[15] = t;
-}
-
-/* copy a block of 8 byte elements using different strides */
-static __inline void loc_blockcopy_mmx (unsigned char * _dst, int _dst_ystride,
-                                        unsigned char * _src, int _src_ystride){
-  __m64 a,b,c,d,e,f,g,h;
-  a = *(__m64*)(_src + 0 * _src_ystride);
-  b = *(__m64*)(_src + 1 * _src_ystride);
-  c = *(__m64*)(_src + 2 * _src_ystride);
-  d = *(__m64*)(_src + 3 * _src_ystride);
-  e = *(__m64*)(_src + 4 * _src_ystride);
-  f = *(__m64*)(_src + 5 * _src_ystride);
-  g = *(__m64*)(_src + 6 * _src_ystride);
-  h = *(__m64*)(_src + 7 * _src_ystride);
-  *(__m64*)(_dst + 0 * _dst_ystride) = a;
-  *(__m64*)(_dst + 1 * _dst_ystride) = b;
-  *(__m64*)(_dst + 2 * _dst_ystride) = c;
-  *(__m64*)(_dst + 3 * _dst_ystride) = d;
-  *(__m64*)(_dst + 4 * _dst_ystride) = e;
-  *(__m64*)(_dst + 5 * _dst_ystride) = f;
-  *(__m64*)(_dst + 6 * _dst_ystride) = g;
-  *(__m64*)(_dst + 7 * _dst_ystride) = h;
-}
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
-  ogg_int16_t __declspec(align(16)) res_buf[64];
-  int dst_framei;
-  int dst_ystride;
-  int zzi;
-  /*_last_zzi is subtly different from an actual count of the number of
-     coefficients we decoded for this block.
-    It contains the value of zzi BEFORE the final token in the block was
-     decoded.
-    In most cases this is an EOB token (the continuation of an EOB run from a
-     previous block counts), and so this is the same as the coefficient count.
-    However, in the case that the last token was NOT an EOB token, but filled
-     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
-    Provided the last token was not a pure zero run, the minimum value it can
-     be is 46, and so that doesn't affect any of the cases in this routine.
-    However, if the last token WAS a pure zero run of length 63, then _last_zzi
-     will be 1 while the number of coefficients decoded is 64.
-    Thus, we will trigger the following special case, where the real
-     coefficient count would not.
-    Note also that a zero run of length 64 will give _last_zzi a value of 0,
-     but we still process the DC coefficient, which might have a non-zero value
-     due to DC prediction.
-    Although convoluted, this is arguably the correct behavior: it allows us to
-     dequantize fewer coefficients and use a smaller transform when the block
-     ends with a long zero run instead of a normal EOB token.
-    It could be smarter... multiple separate zero runs at the end of a block
-     will fool it, but an encoder that generates these really deserves what it
-     gets.
-    Needless to say we inherited this approach from VP3.*/
-  /*Special case only having a DC component.*/
-  if(_last_zzi<2){
-    __m64 p;
-    /*Why is the iquant product rounded in this case and no others? Who knows.*/
-    p = _m_from_int((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
-    /* broadcast 16 bits into all 4 mmx subregisters */
-    p = _m_punpcklwd (p,p);
-    p = _m_punpckldq (p,p);
-    loc_fill_mmx_value ((__m64 *)res_buf, p);
-  }
-  else{
-    /*Then, fill in the remainder of the coefficients with 0's, and perform
-       the iDCT.*/
-    /*First zero the buffer.*/
-    /*On K7, etc., this could be replaced with movntq and sfence.*/
-    loc_fill_mmx_value ((__m64 *)res_buf, _mm_setzero_si64());
-
-    res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
-    /*This is planned to be rewritten in MMX.*/
-    for(zzi=1;zzi<_ncoefs;zzi++)
-    {
-      int ci;
-      ci=OC_FZIG_ZAG[zzi];
-      res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
-       _ac_iquant[ci]);
-    }
-
-    if(_last_zzi<10){
-      oc_idct8x8_10_mmx(res_buf);
-    }
-    else {
-      oc_idct8x8_mmx(res_buf);
-    }
-  }
-  /*Fill in the target buffer.*/
-  dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
-  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  /*For now ystride values in all ref frames assumed to be equal.*/
-  if(_frag->mbmode==OC_MODE_INTRA){
-    oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
-  }
-  else{
-    int ref_framei;
-    int ref_ystride;
-    int mvoffsets[2];
-    ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
-    ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],
-     _frag->mv[1],ref_ystride,_pli)>1){
-      oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
-       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,
-       _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf);
-    }
-    else{
-      oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
-       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf);
-    }
-  }
-
-  _mm_empty();
-}
-
-
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli){
-  const int *fragi;
-  const int *fragi_end;
-  int        dst_framei;
-  int        dst_ystride;
-  int        src_framei;
-  int        src_ystride;
-  dst_framei=_state->ref_frame_idx[_dst_frame];
-  src_framei=_state->ref_frame_idx[_src_frame];
-  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
-  src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
-  fragi_end=_fragis+_nfragis;
-  for(fragi=_fragis;fragi<fragi_end;fragi++){
-    oc_fragment *frag = _state->frags+*fragi;
-    loc_blockcopy_mmx (frag->buffer[dst_framei], dst_ystride,
-                       frag->buffer[src_framei], src_ystride);
-  }
-  _m_empty();
-}
-
-#endif

+ 0 - 49
Engine/lib/libtheora/lib/dec/x86_vc/x86int.h

@@ -1,49 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#if !defined(_x86_x86int_vc_H)
-# define _x86_x86int_vc_H (1)
-# include "../../internal.h"
-
-void oc_state_vtable_init_x86(oc_theora_state *_state);
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue);
-
-void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue);
-
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli);
-
-void oc_restore_fpu_mmx(void);
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,                                               
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,                                                             
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
-
-void oc_idct8x8_mmx(ogg_int16_t _y[64]);
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
-
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,                                                    
-  int _refi,int _pli,int _fragy0,int _fragy_end);
-
-#endif

+ 9 - 4
Engine/lib/libtheora/lib/dec/decapiwrapper.c → Engine/lib/libtheora/lib/decapiwrapper.c

@@ -5,7 +5,7 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
@@ -19,6 +19,7 @@
 #include <string.h>
 #include <string.h>
 #include <limits.h>
 #include <limits.h>
 #include "apiwrapper.h"
 #include "apiwrapper.h"
+#include "decint.h"
 #include "theora/theoradec.h"
 #include "theora/theoradec.h"
 
 
 static void th_dec_api_clear(th_api_wrapper *_api){
 static void th_dec_api_clear(th_api_wrapper *_api){
@@ -47,7 +48,7 @@ static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
   return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
   return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
 }
 }
 
 
-static const oc_state_dispatch_vtbl OC_DEC_DISPATCH_VTBL={
+static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={
   (oc_state_clear_func)theora_decode_clear,
   (oc_state_clear_func)theora_decode_clear,
   (oc_state_control_func)theora_decode_control,
   (oc_state_control_func)theora_decode_control,
   (oc_state_granule_frame_func)theora_decode_granule_frame,
   (oc_state_granule_frame_func)theora_decode_granule_frame,
@@ -95,6 +96,7 @@ int theora_decode_init(theora_state *_td,theora_info *_ci){
     This avoids having to figure out whether or not we need to free the info
     This avoids having to figure out whether or not we need to free the info
      struct in either theora_info_clear() or theora_clear().*/
      struct in either theora_info_clear() or theora_clear().*/
   apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
   apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
+  if(apiinfo==NULL)return OC_FAULT;
   /*Make our own copy of the info struct, since its lifetime should be
   /*Make our own copy of the info struct, since its lifetime should be
      independent of the one we were passed in.*/
      independent of the one we were passed in.*/
   *&apiinfo->info=*_ci;
   *&apiinfo->info=*_ci;
@@ -130,6 +132,7 @@ int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
      theora_info struct like the ones that are used in a theora_state struct.*/
      theora_info struct like the ones that are used in a theora_state struct.*/
   if(api==NULL){
   if(api==NULL){
     _ci->codec_setup=_ogg_calloc(1,sizeof(*api));
     _ci->codec_setup=_ogg_calloc(1,sizeof(*api));
+    if(_ci->codec_setup==NULL)return OC_FAULT;
     api=(th_api_wrapper *)_ci->codec_setup;
     api=(th_api_wrapper *)_ci->codec_setup;
     api->clear=(oc_setup_clear_func)th_dec_api_clear;
     api->clear=(oc_setup_clear_func)th_dec_api_clear;
   }
   }
@@ -167,12 +170,14 @@ int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
 
 
 int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
 int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
   th_api_wrapper  *api;
   th_api_wrapper  *api;
+  th_dec_ctx      *decode;
   th_ycbcr_buffer  buf;
   th_ycbcr_buffer  buf;
   int              ret;
   int              ret;
   if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
   if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
   api=(th_api_wrapper *)_td->i->codec_setup;
   api=(th_api_wrapper *)_td->i->codec_setup;
-  if(!api->decode)return OC_FAULT;
-  ret=th_decode_ycbcr_out(api->decode,buf);
+  decode=(th_dec_ctx *)api->decode;
+  if(!decode)return OC_FAULT;
+  ret=th_decode_ycbcr_out(decode,buf);
   if(ret>=0){
   if(ret>=0){
     _yuv->y_width=buf[0].width;
     _yuv->y_width=buf[0].width;
     _yuv->y_height=buf[0].height;
     _yuv->y_height=buf[0].height;

+ 52 - 45
Engine/lib/libtheora/lib/dec/decinfo.c → Engine/lib/libtheora/lib/decinfo.c

@@ -5,13 +5,13 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-    last mod: $Id: decinfo.c 15400 2008-10-15 12:10:58Z tterribe $
+    last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
@@ -27,30 +27,30 @@
   _opb: The pack buffer to read the octets from.
   _opb: The pack buffer to read the octets from.
   _buf: The byte array to store the unpacked bytes in.
   _buf: The byte array to store the unpacked bytes in.
   _len: The number of octets to unpack.*/
   _len: The number of octets to unpack.*/
-static void oc_unpack_octets(oggpack_buffer *_opb,char *_buf,size_t _len){
+static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){
   while(_len-->0){
   while(_len-->0){
     long val;
     long val;
-    theorapackB_read(_opb,8,&val);
+    val=oc_pack_read(_opb,8);
     *_buf++=(char)val;
     *_buf++=(char)val;
   }
   }
 }
 }
 
 
 /*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
 /*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
-static long oc_unpack_length(oggpack_buffer *_opb){
+static long oc_unpack_length(oc_pack_buf *_opb){
   long ret[4];
   long ret[4];
   int  i;
   int  i;
-  for(i=0;i<4;i++)theorapackB_read(_opb,8,ret+i);
+  for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8);
   return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
   return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
 }
 }
 
 
-static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){
+static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){
   long val;
   long val;
   /*Check the codec bitstream version.*/
   /*Check the codec bitstream version.*/
-  theorapackB_read(_opb,8,&val);
+  val=oc_pack_read(_opb,8);
   _info->version_major=(unsigned char)val;
   _info->version_major=(unsigned char)val;
-  theorapackB_read(_opb,8,&val);
+  val=oc_pack_read(_opb,8);
   _info->version_minor=(unsigned char)val;
   _info->version_minor=(unsigned char)val;
-  theorapackB_read(_opb,8,&val);
+  val=oc_pack_read(_opb,8);
   _info->version_subminor=(unsigned char)val;
   _info->version_subminor=(unsigned char)val;
   /*verify we can parse this bitstream version.
   /*verify we can parse this bitstream version.
      We accept earlier minors and all subminors, by spec*/
      We accept earlier minors and all subminors, by spec*/
@@ -60,25 +60,21 @@ static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){
     return TH_EVERSION;
     return TH_EVERSION;
   }
   }
   /*Read the encoded frame description.*/
   /*Read the encoded frame description.*/
-  theorapackB_read(_opb,16,&val);
+  val=oc_pack_read(_opb,16);
   _info->frame_width=(ogg_uint32_t)val<<4;
   _info->frame_width=(ogg_uint32_t)val<<4;
-  theorapackB_read(_opb,16,&val);
+  val=oc_pack_read(_opb,16);
   _info->frame_height=(ogg_uint32_t)val<<4;
   _info->frame_height=(ogg_uint32_t)val<<4;
-  theorapackB_read(_opb,24,&val);
+  val=oc_pack_read(_opb,24);
   _info->pic_width=(ogg_uint32_t)val;
   _info->pic_width=(ogg_uint32_t)val;
-  theorapackB_read(_opb,24,&val);
+  val=oc_pack_read(_opb,24);
   _info->pic_height=(ogg_uint32_t)val;
   _info->pic_height=(ogg_uint32_t)val;
-  theorapackB_read(_opb,8,&val);
+  val=oc_pack_read(_opb,8);
   _info->pic_x=(ogg_uint32_t)val;
   _info->pic_x=(ogg_uint32_t)val;
-  /*Note: The sense of pic_y is inverted in what we pass back to the
-     application compared to how it is stored in the bitstream.
-    This is because the bitstream uses a right-handed coordinate system, while
-     applications expect a left-handed one.*/
-  theorapackB_read(_opb,8,&val);
-  _info->pic_y=_info->frame_height-_info->pic_height-(ogg_uint32_t)val;
-  theorapackB_read(_opb,32,&val);
+  val=oc_pack_read(_opb,8);
+  _info->pic_y=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,32);
   _info->fps_numerator=(ogg_uint32_t)val;
   _info->fps_numerator=(ogg_uint32_t)val;
-  theorapackB_read(_opb,32,&val);
+  val=oc_pack_read(_opb,32);
   _info->fps_denominator=(ogg_uint32_t)val;
   _info->fps_denominator=(ogg_uint32_t)val;
   if(_info->frame_width==0||_info->frame_height==0||
   if(_info->frame_width==0||_info->frame_height==0||
    _info->pic_width+_info->pic_x>_info->frame_width||
    _info->pic_width+_info->pic_x>_info->frame_width||
@@ -86,38 +82,46 @@ static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){
    _info->fps_numerator==0||_info->fps_denominator==0){
    _info->fps_numerator==0||_info->fps_denominator==0){
     return TH_EBADHEADER;
     return TH_EBADHEADER;
   }
   }
-  theorapackB_read(_opb,24,&val);
+  /*Note: The sense of pic_y is inverted in what we pass back to the
+     application compared to how it is stored in the bitstream.
+    This is because the bitstream uses a right-handed coordinate system, while
+     applications expect a left-handed one.*/
+  _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+  val=oc_pack_read(_opb,24);
   _info->aspect_numerator=(ogg_uint32_t)val;
   _info->aspect_numerator=(ogg_uint32_t)val;
-  theorapackB_read(_opb,24,&val);
+  val=oc_pack_read(_opb,24);
   _info->aspect_denominator=(ogg_uint32_t)val;
   _info->aspect_denominator=(ogg_uint32_t)val;
-  theorapackB_read(_opb,8,&val);
+  val=oc_pack_read(_opb,8);
   _info->colorspace=(th_colorspace)val;
   _info->colorspace=(th_colorspace)val;
-  theorapackB_read(_opb,24,&val);
+  val=oc_pack_read(_opb,24);
   _info->target_bitrate=(int)val;
   _info->target_bitrate=(int)val;
-  theorapackB_read(_opb,6,&val);
+  val=oc_pack_read(_opb,6);
   _info->quality=(int)val;
   _info->quality=(int)val;
-  theorapackB_read(_opb,5,&val);
+  val=oc_pack_read(_opb,5);
   _info->keyframe_granule_shift=(int)val;
   _info->keyframe_granule_shift=(int)val;
-  theorapackB_read(_opb,2,&val);
+  val=oc_pack_read(_opb,2);
   _info->pixel_fmt=(th_pixel_fmt)val;
   _info->pixel_fmt=(th_pixel_fmt)val;
   if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
   if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
-  if(theorapackB_read(_opb,3,&val)<0||val!=0)return TH_EBADHEADER;
+  val=oc_pack_read(_opb,3);
+  if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
   return 0;
   return 0;
 }
 }
 
 
-static int oc_comment_unpack(oggpack_buffer *_opb,th_comment *_tc){
+static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){
   long len;
   long len;
   int  i;
   int  i;
   /*Read the vendor string.*/
   /*Read the vendor string.*/
   len=oc_unpack_length(_opb);
   len=oc_unpack_length(_opb);
-  if(len<0||theorapackB_bytes(_opb)+len>_opb->storage)return TH_EBADHEADER;
+  if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER;
   _tc->vendor=_ogg_malloc((size_t)len+1);
   _tc->vendor=_ogg_malloc((size_t)len+1);
+  if(_tc->vendor==NULL)return TH_EFAULT;
   oc_unpack_octets(_opb,_tc->vendor,len);
   oc_unpack_octets(_opb,_tc->vendor,len);
   _tc->vendor[len]='\0';
   _tc->vendor[len]='\0';
   /*Read the user comments.*/
   /*Read the user comments.*/
   _tc->comments=(int)oc_unpack_length(_opb);
   _tc->comments=(int)oc_unpack_length(_opb);
-  if(_tc->comments<0||_tc->comments>(LONG_MAX>>2)||
-   theorapackB_bytes(_opb)+((long)_tc->comments<<2)>_opb->storage){
+  len=_tc->comments;
+  if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){
+    _tc->comments=0;
     return TH_EBADHEADER;
     return TH_EBADHEADER;
   }
   }
   _tc->comment_lengths=(int *)_ogg_malloc(
   _tc->comment_lengths=(int *)_ogg_malloc(
@@ -126,19 +130,23 @@ static int oc_comment_unpack(oggpack_buffer *_opb,th_comment *_tc){
    _tc->comments*sizeof(_tc->user_comments[0]));
    _tc->comments*sizeof(_tc->user_comments[0]));
   for(i=0;i<_tc->comments;i++){
   for(i=0;i<_tc->comments;i++){
     len=oc_unpack_length(_opb);
     len=oc_unpack_length(_opb);
-    if(len<0||theorapackB_bytes(_opb)+len>_opb->storage){
+    if(len<0||len>oc_pack_bytes_left(_opb)){
       _tc->comments=i;
       _tc->comments=i;
       return TH_EBADHEADER;
       return TH_EBADHEADER;
     }
     }
     _tc->comment_lengths[i]=len;
     _tc->comment_lengths[i]=len;
     _tc->user_comments[i]=_ogg_malloc((size_t)len+1);
     _tc->user_comments[i]=_ogg_malloc((size_t)len+1);
+    if(_tc->user_comments[i]==NULL){
+      _tc->comments=i;
+      return TH_EFAULT;
+    }
     oc_unpack_octets(_opb,_tc->user_comments[i],len);
     oc_unpack_octets(_opb,_tc->user_comments[i],len);
     _tc->user_comments[i][len]='\0';
     _tc->user_comments[i][len]='\0';
   }
   }
-  return theorapackB_read(_opb,0,&len)<0?TH_EBADHEADER:0;
+  return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0;
 }
 }
 
 
-static int oc_setup_unpack(oggpack_buffer *_opb,th_setup_info *_setup){
+static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){
   int ret;
   int ret;
   /*Read the quantizer tables.*/
   /*Read the quantizer tables.*/
   ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
   ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
@@ -152,13 +160,13 @@ static void oc_setup_clear(th_setup_info *_setup){
   oc_huff_trees_clear(_setup->huff_tables);
   oc_huff_trees_clear(_setup->huff_tables);
 }
 }
 
 
-static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info,
+static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info,
  th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
  th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
   char buffer[6];
   char buffer[6];
   long val;
   long val;
   int  packtype;
   int  packtype;
   int  ret;
   int  ret;
-  theorapackB_read(_opb,8,&val);
+  val=oc_pack_read(_opb,8);
   packtype=(int)val;
   packtype=(int)val;
   /*If we're at a data packet and we have received all three headers, we're
   /*If we're at a data packet and we have received all three headers, we're
      done.*/
      done.*/
@@ -198,6 +206,7 @@ static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info,
         return TH_EBADHEADER;
         return TH_EBADHEADER;
       }
       }
       setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
       setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
+      if(setup==NULL)return TH_EFAULT;
       ret=oc_setup_unpack(_opb,setup);
       ret=oc_setup_unpack(_opb,setup);
       if(ret<0){
       if(ret<0){
         oc_setup_clear(setup);
         oc_setup_clear(setup);
@@ -222,13 +231,11 @@ static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info,
    stream until it returns 0.*/
    stream until it returns 0.*/
 int th_decode_headerin(th_info *_info,th_comment *_tc,
 int th_decode_headerin(th_info *_info,th_comment *_tc,
  th_setup_info **_setup,ogg_packet *_op){
  th_setup_info **_setup,ogg_packet *_op){
-  oggpack_buffer opb;
-  int            ret;
+  oc_pack_buf opb;
   if(_op==NULL)return TH_EBADHEADER;
   if(_op==NULL)return TH_EBADHEADER;
   if(_info==NULL)return TH_EFAULT;
   if(_info==NULL)return TH_EFAULT;
-  theorapackB_readinit(&opb,_op->packet,_op->bytes);
-  ret=oc_dec_headerin(&opb,_info,_tc,_setup,_op);
-  return ret;
+  oc_pack_readinit(&opb,_op->packet,_op->bytes);
+  return oc_dec_headerin(&opb,_info,_tc,_setup,_op);
 }
 }
 
 
 void th_setup_free(th_setup_info *_setup){
 void th_setup_free(th_setup_info *_setup){

+ 30 - 18
Engine/lib/libtheora/lib/dec/decint.h → Engine/lib/libtheora/lib/decint.h

@@ -5,13 +5,13 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-    last mod: $Id: decint.h 15400 2008-10-15 12:10:58Z tterribe $
+    last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
@@ -19,13 +19,12 @@
 #if !defined(_decint_H)
 #if !defined(_decint_H)
 # define _decint_H (1)
 # define _decint_H (1)
 # include "theora/theoradec.h"
 # include "theora/theoradec.h"
-# include "../internal.h"
+# include "internal.h"
 # include "bitpack.h"
 # include "bitpack.h"
 
 
 typedef struct th_setup_info oc_setup_info;
 typedef struct th_setup_info oc_setup_info;
 typedef struct th_dec_ctx    oc_dec_ctx;
 typedef struct th_dec_ctx    oc_dec_ctx;
 
 
-# include "idct.h"
 # include "huffdec.h"
 # include "huffdec.h"
 # include "dequant.h"
 # include "dequant.h"
 
 
@@ -54,24 +53,20 @@ struct th_dec_ctx{
      when a frame has been processed and a data packet is ready.*/
      when a frame has been processed and a data packet is ready.*/
   int                  packet_state;
   int                  packet_state;
   /*Buffer in which to assemble packets.*/
   /*Buffer in which to assemble packets.*/
-  oggpack_buffer       opb;
+  oc_pack_buf          opb;
   /*Huffman decode trees.*/
   /*Huffman decode trees.*/
   oc_huff_node        *huff_tables[TH_NHUFFMAN_TABLES];
   oc_huff_node        *huff_tables[TH_NHUFFMAN_TABLES];
-  /*The index of one past the last token in each plane for each coefficient.
-    The final entries are the total number of tokens for each coefficient.*/
-  int                  ti0[3][64];
-  /*The index of one past the last extra bits entry in each plane for each
-     coefficient.
-    The final entries are the total number of extra bits entries for each
-     coefficient.*/
-  int                  ebi0[3][64];
+  /*The index of the first token in each plane for each coefficient.*/
+  ptrdiff_t            ti0[3][64];
   /*The number of outstanding EOB runs at the start of each coefficient in each
   /*The number of outstanding EOB runs at the start of each coefficient in each
      plane.*/
      plane.*/
-  int                  eob_runs[3][64];
+  ptrdiff_t            eob_runs[3][64];
   /*The DCT token lists.*/
   /*The DCT token lists.*/
-  unsigned char      **dct_tokens;
+  unsigned char       *dct_tokens;
   /*The extra bits associated with DCT tokens.*/
   /*The extra bits associated with DCT tokens.*/
-  ogg_uint16_t       **extra_bits;
+  unsigned char       *extra_bits;
+  /*The number of dct tokens unpacked so far.*/
+  int                  dct_tokens_count;
   /*The out-of-loop post-processing level.*/
   /*The out-of-loop post-processing level.*/
   int                  pp_level;
   int                  pp_level;
   /*The DC scale used for out-of-loop deblocking.*/
   /*The DC scale used for out-of-loop deblocking.*/
@@ -85,11 +80,28 @@ struct th_dec_ctx{
   /*The storage for the post-processed frame buffer.*/
   /*The storage for the post-processed frame buffer.*/
   unsigned char       *pp_frame_data;
   unsigned char       *pp_frame_data;
   /*Whether or not the post-processsed frame buffer has space for chroma.*/
   /*Whether or not the post-processsed frame buffer has space for chroma.*/
-  int                  pp_frame_has_chroma;
-  /*The buffer used for the post-processed frame.*/
+  int                  pp_frame_state;
+  /*The buffer used for the post-processed frame.
+    Note that this is _not_ guaranteed to have the same strides and offsets as
+     the reference frame buffers.*/
   th_ycbcr_buffer      pp_frame_buf;
   th_ycbcr_buffer      pp_frame_buf;
   /*The striped decode callback function.*/
   /*The striped decode callback function.*/
   th_stripe_callback   stripe_cb;
   th_stripe_callback   stripe_cb;
+# if defined(HAVE_CAIRO)
+  /*Output metrics for debugging.*/
+  int                  telemetry;
+  int                  telemetry_mbmode;
+  int                  telemetry_mv;
+  int                  telemetry_qi;
+  int                  telemetry_bits;
+  int                  telemetry_frame_bytes;
+  int                  telemetry_coding_bytes;
+  int                  telemetry_mode_bytes;
+  int                  telemetry_mv_bytes;
+  int                  telemetry_qi_bytes;
+  int                  telemetry_dc_bytes;
+  unsigned char       *telemetry_frame_data;
+# endif
 };
 };
 
 
 #endif
 #endif

+ 2943 - 0
Engine/lib/libtheora/lib/decode.c

@@ -0,0 +1,2943 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "decint.h"
+#if defined(OC_DUMP_IMAGES)
+# include <stdio.h>
+# include "png.h"
+#endif
+#if defined(HAVE_CAIRO)
+# include <cairo.h>
+#endif
+
+
+/*No post-processing.*/
+#define OC_PP_LEVEL_DISABLED  (0)
+/*Keep track of DC qi for each block only.*/
+#define OC_PP_LEVEL_TRACKDCQI (1)
+/*Deblock the luma plane.*/
+#define OC_PP_LEVEL_DEBLOCKY  (2)
+/*Dering the luma plane.*/
+#define OC_PP_LEVEL_DERINGY   (3)
+/*Stronger luma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGY  (4)
+/*Deblock the chroma planes.*/
+#define OC_PP_LEVEL_DEBLOCKC  (5)
+/*Dering the chroma planes.*/
+#define OC_PP_LEVEL_DERINGC   (6)
+/*Stronger chroma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGC  (7)
+/*Maximum valid post-processing level.*/
+#define OC_PP_LEVEL_MAX       (7)
+
+
+
+/*The mode alphabets for the various mode coding schemes.
+  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
+static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
+  /*Last MV dominates */
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
+    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
+  },
+  /*No MV dominates.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  /*Default ordering.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  }
+};
+
+
+/*The original DCT tokens are extended and reordered during the construction of
+   the Huffman tables.
+  The extension means more bits can be read with fewer calls to the bitpacker
+   during the Huffman decoding process (at the cost of larger Huffman tables),
+   and fewer tokens require additional extra bits (reducing the average storage
+   per decoded token).
+  The revised ordering reveals essential information in the token value
+   itself; specifically, whether or not there are additional extra bits to read
+   and the parameter to which those extra bits are applied.
+  The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
+  The extra bits are added into code word at the bit position inferred from the
+   token value, giving the final code word from which all required parameters
+   are derived.
+  The number of EOBs and the leading zero run length can be extracted directly.
+  The coefficient magnitude is optionally negated before extraction, according
+   to a 'flip' bit.*/
+
+/*The number of additional extra bits that are decoded with each of the
+   internal DCT tokens.*/
+static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
+  12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
+};
+
+/*Whether or not an internal token needs any additional extra bits.*/
+#define OC_DCT_TOKEN_NEEDS_MORE(token) \
+ (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
+  sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
+
+/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
+#define OC_DCT_TOKEN_FAT_EOB (0)
+
+/*The number of EOBs to use for an end-of-frame token.
+  Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
+   is not yet available everywhere; this should be equivalent.*/
+#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
+
+/*The location of the (6) run legth bits in the code word.
+  These are placed at index 0 and given 8 bits (even though 6 would suffice)
+   because it may be faster to extract the lower byte on some platforms.*/
+#define OC_DCT_CW_RLEN_SHIFT (0)
+/*The location of the (12) EOB bits in the code word.*/
+#define OC_DCT_CW_EOB_SHIFT  (8)
+/*The location of the (1) flip bit in the code word.
+  This must be right under the magnitude bits.*/
+#define OC_DCT_CW_FLIP_BIT   (20)
+/*The location of the (11) token magnitude bits in the code word.
+  These must be last, and rely on a sign-extending right shift.*/
+#define OC_DCT_CW_MAG_SHIFT  (21)
+
+/*Pack the given fields into a code word.*/
+#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
+ ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
+ (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
+ (_flip)<<OC_DCT_CW_FLIP_BIT| \
+ (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
+
+/*A special code word value that signals the end of the frame (a long EOB run
+   of zero).*/
+#define OC_DCT_CW_FINISH (0)
+
+/*The position at which to insert the extra bits in the code word.
+  We use this formulation because Intel has no useful cmov.
+  A real architecture would probably do better with two of those.
+  This translates to 11 instructions(!), and is _still_ faster than either a
+   table lookup (just barely) or the naive double-ternary implementation (which
+   gcc translates to a jump and a cmov).
+  This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
+   you want to make one of the other shifts zero.*/
+#define OC_DCT_TOKEN_EB_POS(_token) \
+ ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
+ +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
+
+/*The code words for each internal token.
+  See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
+   order.*/
+static const ogg_int32_t OC_DCT_CODE_WORD[92]={
+  /*These tokens require additional extra bits for the EOB count.*/
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+  OC_DCT_CW_FINISH,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+  OC_DCT_CW_PACK(16, 0,  0,0),
+  /*These tokens require additional extra bits for the magnitude.*/
+  /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 13,0),
+  OC_DCT_CW_PACK( 0, 0, 13,1),
+  /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 21,0),
+  OC_DCT_CW_PACK( 0, 0, 21,1),
+  /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 37,0),
+  OC_DCT_CW_PACK( 0, 0, 37,1),
+  /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 69,0),
+  OC_DCT_CW_PACK( 0, 0,325,0),
+  OC_DCT_CW_PACK( 0, 0, 69,1),
+  OC_DCT_CW_PACK( 0, 0,325,1),
+  /*These tokens require additional extra bits for the run length.*/
+  /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0,10, +1,0),
+  OC_DCT_CW_PACK( 0,10, -1,0),
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)
+    Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  /*The remaining tokens require no additional extra bits.*/
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 1, 0,  0,0),
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 2, 0,  0,0),
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 3, 0,  0,0),
+  /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
+  OC_DCT_CW_PACK( 0, 1, +1,0),
+  OC_DCT_CW_PACK( 0, 1, -1,0),
+  OC_DCT_CW_PACK( 0, 2, +1,0),
+  OC_DCT_CW_PACK( 0, 2, -1,0),
+  OC_DCT_CW_PACK( 0, 3, +1,0),
+  OC_DCT_CW_PACK( 0, 3, -1,0),
+  OC_DCT_CW_PACK( 0, 4, +1,0),
+  OC_DCT_CW_PACK( 0, 4, -1,0),
+  OC_DCT_CW_PACK( 0, 5, +1,0),
+  OC_DCT_CW_PACK( 0, 5, -1,0),
+  /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 1, +2,0),
+  OC_DCT_CW_PACK( 0, 1, +3,0),
+  OC_DCT_CW_PACK( 0, 1, -2,0),
+  OC_DCT_CW_PACK( 0, 1, -3,0),
+  /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 6, +1,0),
+  OC_DCT_CW_PACK( 0, 7, +1,0),
+  OC_DCT_CW_PACK( 0, 8, +1,0),
+  OC_DCT_CW_PACK( 0, 9, +1,0),
+  OC_DCT_CW_PACK( 0, 6, -1,0),
+  OC_DCT_CW_PACK( 0, 7, -1,0),
+  OC_DCT_CW_PACK( 0, 8, -1,0),
+  OC_DCT_CW_PACK( 0, 9, -1,0),
+  /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 2, +2,0),
+  OC_DCT_CW_PACK( 0, 3, +2,0),
+  OC_DCT_CW_PACK( 0, 2, +3,0),
+  OC_DCT_CW_PACK( 0, 3, +3,0),
+  OC_DCT_CW_PACK( 0, 2, -2,0),
+  OC_DCT_CW_PACK( 0, 3, -2,0),
+  OC_DCT_CW_PACK( 0, 2, -3,0),
+  OC_DCT_CW_PACK( 0, 3, -3,0),
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
+    Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  OC_DCT_CW_PACK( 0, 1,  0,0),
+  OC_DCT_CW_PACK( 0, 2,  0,0),
+  OC_DCT_CW_PACK( 0, 3,  0,0),
+  OC_DCT_CW_PACK( 0, 4,  0,0),
+  OC_DCT_CW_PACK( 0, 5,  0,0),
+  OC_DCT_CW_PACK( 0, 6,  0,0),
+  OC_DCT_CW_PACK( 0, 7,  0,0),
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +1,0),
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -1,0),
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +2,0),
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -2,0),
+  /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
+  OC_DCT_CW_PACK( 0, 0, +3,0),
+  OC_DCT_CW_PACK( 0, 0, -3,0),
+  OC_DCT_CW_PACK( 0, 0, +4,0),
+  OC_DCT_CW_PACK( 0, 0, -4,0),
+  OC_DCT_CW_PACK( 0, 0, +5,0),
+  OC_DCT_CW_PACK( 0, 0, -5,0),
+  OC_DCT_CW_PACK( 0, 0, +6,0),
+  OC_DCT_CW_PACK( 0, 0, -6,0),
+  /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +7,0),
+  OC_DCT_CW_PACK( 0, 0, +8,0),
+  OC_DCT_CW_PACK( 0, 0, -7,0),
+  OC_DCT_CW_PACK( 0, 0, -8,0),
+  /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +9,0),
+  OC_DCT_CW_PACK( 0, 0,+10,0),
+  OC_DCT_CW_PACK( 0, 0,+11,0),
+  OC_DCT_CW_PACK( 0, 0,+12,0),
+  OC_DCT_CW_PACK( 0, 0, -9,0),
+  OC_DCT_CW_PACK( 0, 0,-10,0),
+  OC_DCT_CW_PACK( 0, 0,-11,0),
+  OC_DCT_CW_PACK( 0, 0,-12,0),
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 8, 0,  0,0),
+  OC_DCT_CW_PACK( 9, 0,  0,0),
+  OC_DCT_CW_PACK(10, 0,  0,0),
+  OC_DCT_CW_PACK(11, 0,  0,0),
+  OC_DCT_CW_PACK(12, 0,  0,0),
+  OC_DCT_CW_PACK(13, 0,  0,0),
+  OC_DCT_CW_PACK(14, 0,  0,0),
+  OC_DCT_CW_PACK(15, 0,  0,0),
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 4, 0,  0,0),
+  OC_DCT_CW_PACK( 5, 0,  0,0),
+  OC_DCT_CW_PACK( 6, 0,  0,0),
+  OC_DCT_CW_PACK( 7, 0,  0,0),
+};
+
+
+
+static int oc_sb_run_unpack(oc_pack_buf *_opb){
+  long bits;
+  int ret;
+  /*Coding scheme:
+       Codeword            Run Length
+     0                       1
+     10x                     2-3
+     110x                    4-5
+     1110xx                  6-9
+     11110xxx                10-17
+     111110xxxx              18-33
+     111111xxxxxxxxxxxx      34-4129*/
+  bits=oc_pack_read1(_opb);
+  if(bits==0)return 1;
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 2+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read1(_opb);
+    return 4+(int)bits;
+  }
+  bits=oc_pack_read(_opb,3);
+  if((bits&4)==0)return 6+(int)bits;
+  else if((bits&2)==0){
+    ret=10+((bits&1)<<2);
+    bits=oc_pack_read(_opb,2);
+    return ret+(int)bits;
+  }
+  else if((bits&1)==0){
+    bits=oc_pack_read(_opb,4);
+    return 18+(int)bits;
+  }
+  bits=oc_pack_read(_opb,12);
+  return 34+(int)bits;
+}
+
+static int oc_block_run_unpack(oc_pack_buf *_opb){
+  long bits;
+  long bits2;
+  /*Coding scheme:
+     Codeword             Run Length
+     0x                      1-2
+     10x                     3-4
+     110x                    5-6
+     1110xx                  7-10
+     11110xx                 11-14
+     11111xxxx               15-30*/
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 1+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read1(_opb);
+    return 3+(int)bits;
+  }
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 5+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read(_opb,2);
+    return 7+(int)bits;
+  }
+  bits=oc_pack_read(_opb,3);
+  if((bits&4)==0)return 11+bits;
+  bits2=oc_pack_read(_opb,2);
+  return 15+((bits&3)<<2)+bits2;
+}
+
+
+
+static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
+ const th_setup_info *_setup){
+  int qti;
+  int pli;
+  int qi;
+  int ret;
+  ret=oc_state_init(&_dec->state,_info,3);
+  if(ret<0)return ret;
+  ret=oc_huff_trees_copy(_dec->huff_tables,
+   (const oc_huff_node *const *)_setup->huff_tables);
+  if(ret<0){
+    oc_state_clear(&_dec->state);
+    return ret;
+  }
+  /*For each fragment, allocate one byte for every DCT coefficient token, plus
+     one byte for extra-bits for each token, plus one more byte for the long
+     EOB run, just in case it's the very last token and has a run length of
+     one.*/
+  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
+   _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
+  if(_dec->dct_tokens==NULL){
+    oc_huff_trees_clear(_dec->huff_tables);
+    oc_state_clear(&_dec->state);
+    return TH_EFAULT;
+  }
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _dec->state.dequant_tables[qi][pli][qti]=
+     _dec->state.dequant_table_data[qi][pli][qti];
+  }
+  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
+   &_setup->qinfo);
+  for(qi=0;qi<64;qi++){
+    int qsum;
+    qsum=0;
+    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+      qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+
+       _dec->state.dequant_tables[qti][pli][qi][17]+
+       _dec->state.dequant_tables[qti][pli][qi][18]+
+       _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0);
+    }
+    _dec->pp_sharp_mod[qi]=-(qsum>>11);
+  }
+  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
+   sizeof(_dec->state.loop_filter_limits));
+  _dec->pp_level=OC_PP_LEVEL_DISABLED;
+  _dec->dc_qis=NULL;
+  _dec->variances=NULL;
+  _dec->pp_frame_data=NULL;
+  _dec->stripe_cb.ctx=NULL;
+  _dec->stripe_cb.stripe_decoded=NULL;
+#if defined(HAVE_CAIRO)
+  _dec->telemetry=0;
+  _dec->telemetry_bits=0;
+  _dec->telemetry_qi=0;
+  _dec->telemetry_mbmode=0;
+  _dec->telemetry_mv=0;
+  _dec->telemetry_frame_data=NULL;
+#endif
+  return 0;
+}
+
+static void oc_dec_clear(oc_dec_ctx *_dec){
+#if defined(HAVE_CAIRO)
+  _ogg_free(_dec->telemetry_frame_data);
+#endif
+  _ogg_free(_dec->pp_frame_data);
+  _ogg_free(_dec->variances);
+  _ogg_free(_dec->dc_qis);
+  _ogg_free(_dec->dct_tokens);
+  oc_huff_trees_clear(_dec->huff_tables);
+  oc_state_clear(&_dec->state);
+}
+
+
+static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
+  long val;
+  /*Check to make sure this is a data packet.*/
+  val=oc_pack_read1(&_dec->opb);
+  if(val!=0)return TH_EBADPACKET;
+  /*Read in the frame type (I or P).*/
+  val=oc_pack_read1(&_dec->opb);
+  _dec->state.frame_type=(int)val;
+  /*Read in the qi list.*/
+  val=oc_pack_read(&_dec->opb,6);
+  _dec->state.qis[0]=(unsigned char)val;
+  val=oc_pack_read1(&_dec->opb);
+  if(!val)_dec->state.nqis=1;
+  else{
+    val=oc_pack_read(&_dec->opb,6);
+    _dec->state.qis[1]=(unsigned char)val;
+    val=oc_pack_read1(&_dec->opb);
+    if(!val)_dec->state.nqis=2;
+    else{
+      val=oc_pack_read(&_dec->opb,6);
+      _dec->state.qis[2]=(unsigned char)val;
+      _dec->state.nqis=3;
+    }
+  }
+  if(_dec->state.frame_type==OC_INTRA_FRAME){
+    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
+      Most of the other unused bits in the VP3 headers were eliminated.
+      I don't know why these remain.*/
+    /*I wanted to eliminate wasted bits, but not all config wiggle room
+       --Monty.*/
+    val=oc_pack_read(&_dec->opb,3);
+    if(val!=0)return TH_EIMPL;
+  }
+  return 0;
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+  This also builds up the coded fragment list (in coded order), and clears the
+   uncoded fragment list.
+  It does not update the coded macro block list nor the super block flags, as
+   those are not used when decoding INTRA frames.*/
+static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                pli;
+  coded_fragis=_dec->state.coded_fragis;
+  prev_ncoded_fragis=ncoded_fragis=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            frags[fragi].coded=1;
+            frags[fragi].mb_mode=OC_MODE_INTRA;
+            coded_fragis[ncoded_fragis++]=fragi;
+          }
+        }
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+}
+
+/*Decodes the bit flags indicating whether each super block is partially coded
+   or not.
+  Return: The number of partially coded super blocks.*/
+static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     npartial;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  sbi=npartial=0;
+  while(sbi<nsbs){
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    do{
+      sb_flags[sbi].coded_partially=flag;
+      sb_flags[sbi].coded_fully=0;
+      npartial+=flag;
+      sbi++;
+    }
+    while(--run_count>0&&sbi<nsbs);
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+  return npartial;
+}
+
+/*Decodes the bit flags for whether or not each non-partially-coded super
+   block is fully coded or not.
+  This function should only be called if there is at least one
+   non-partially-coded super block.
+  Return: The number of partially coded super blocks.*/
+static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  /*Skip partially coded super blocks.*/
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  do{
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    for(;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
+      if(run_count--<=0)break;
+      sb_flags[sbi].coded_fully=flag;
+    }
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  while(sbi<nsbs);
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  long               val;
+  int                pli;
+  int                flag;
+  int                run_count;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t         *uncoded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          nuncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  npartial=oc_dec_partial_sb_flags_unpack(_dec);
+  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
+  if(npartial>0){
+    val=oc_pack_read1(&_dec->opb);
+    flag=!(int)val;
+  }
+  else flag=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=run_count=0;
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            int coded;
+            if(sb_flags[sbi].coded_fully)coded=1;
+            else if(!sb_flags[sbi].coded_partially)coded=0;
+            else{
+              if(run_count<=0){
+                run_count=oc_block_run_unpack(&_dec->opb);
+                flag=!flag;
+              }
+              run_count--;
+              coded=flag;
+            }
+            if(coded)coded_fragis[ncoded_fragis++]=fragi;
+            else *(uncoded_fragis-++nuncoded_fragis)=fragi;
+            frags[fragi].coded=coded;
+          }
+        }
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+
+
+typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mode_unpack(oc_pack_buf *_opb){
+  long val;
+  int  i;
+  for(i=0;i<7;i++){
+    val=oc_pack_read1(_opb);
+    if(!val)break;
+  }
+  return i;
+}
+
+static int oc_clc_mode_unpack(oc_pack_buf *_opb){
+  long val;
+  val=oc_pack_read(_opb,3);
+  return (int)val;
+}
+
+/*Unpacks the list of macro block modes for INTER frames.*/
+static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
+  const oc_mb_map     *mb_maps;
+  signed char         *mb_modes;
+  const oc_fragment   *frags;
+  const unsigned char *alphabet;
+  unsigned char        scheme0_alphabet[8];
+  oc_mode_unpack_func  mode_unpack;
+  size_t               nmbs;
+  size_t               mbi;
+  long                 val;
+  int                  mode_scheme;
+  val=oc_pack_read(&_dec->opb,3);
+  mode_scheme=(int)val;
+  if(mode_scheme==0){
+    int mi;
+    /*Just in case, initialize the modes to something.
+      If the bitstream doesn't contain each index exactly once, it's likely
+       corrupt and the rest of the packet is garbage anyway, but this way we
+       won't crash, and we'll decode SOMETHING.*/
+    /*LOOP VECTORIZES*/
+    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
+    for(mi=0;mi<OC_NMODES;mi++){
+      val=oc_pack_read(&_dec->opb,3);
+      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
+    }
+    alphabet=scheme0_alphabet;
+  }
+  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
+  if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
+  else mode_unpack=oc_vlc_mode_unpack;
+  mb_modes=_dec->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  nmbs=_dec->state.nmbs;
+  frags=_dec->state.frags;
+  for(mbi=0;mbi<nmbs;mbi++){
+    if(mb_modes[mbi]!=OC_MODE_INVALID){
+      int bi;
+      /*Check for a coded luma block in this macro block.*/
+      for(bi=0;bi<4&&!frags[mb_maps[mbi][0][bi]].coded;bi++);
+      /*We found one, decode a mode.*/
+      if(bi<4)mb_modes[mbi]=alphabet[(*mode_unpack)(&_dec->opb)];
+      /*There were none: INTER_NOMV is forced.*/
+      else mb_modes[mbi]=OC_MODE_INTER_NOMV;
+    }
+  }
+}
+
+
+
+typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){
+  long bits;
+  int  mask;
+  int  mv;
+  bits=oc_pack_read(_opb,3);
+  switch(bits){
+    case  0:return 0;
+    case  1:return 1;
+    case  2:return -1;
+    case  3:
+    case  4:{
+      mv=(int)(bits-1);
+      bits=oc_pack_read1(_opb);
+    }break;
+    /*case  5:
+    case  6:
+    case  7:*/
+    default:{
+      mv=1<<bits-3;
+      bits=oc_pack_read(_opb,bits-2);
+      mv+=(int)(bits>>1);
+      bits&=1;
+    }break;
+  }
+  mask=-(int)bits;
+  return mv+mask^mask;
+}
+
+static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){
+  long bits;
+  int  mask;
+  int  mv;
+  bits=oc_pack_read(_opb,6);
+  mv=(int)bits>>1;
+  mask=-((int)bits&1);
+  return mv+mask^mask;
+}
+
+/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
+   block modes and motion vectors to the individual fragments.*/
+static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
+  const oc_mb_map        *mb_maps;
+  const signed char      *mb_modes;
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_mv_comp_unpack_func  mv_comp_unpack;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  const unsigned char    *map_idxs;
+  int                     map_nidxs;
+  oc_mv                   last_mv[2];
+  oc_mv                   cbmvs[4];
+  size_t                  nmbs;
+  size_t                  mbi;
+  long                    val;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
+  val=oc_pack_read1(&_dec->opb);
+  mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
+  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
+  memset(last_mv,0,sizeof(last_mv));
+  frags=_dec->state.frags;
+  frag_mvs=_dec->state.frag_mvs;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  mb_modes=_dec->state.mb_modes;
+  nmbs=_dec->state.nmbs;
+  for(mbi=0;mbi<nmbs;mbi++){
+    int          mb_mode;
+    mb_mode=mb_modes[mbi];
+    if(mb_mode!=OC_MODE_INVALID){
+      oc_mv        mbmv;
+      ptrdiff_t    fragi;
+      int          coded[13];
+      int          codedi;
+      int          ncoded;
+      int          mapi;
+      int          mapii;
+      /*Search for at least one coded fragment.*/
+      ncoded=mapii=0;
+      do{
+        mapi=map_idxs[mapii];
+        fragi=mb_maps[mbi][mapi>>2][mapi&3];
+        if(frags[fragi].coded)coded[ncoded++]=mapi;
+      }
+      while(++mapii<map_nidxs);
+      if(ncoded<=0)continue;
+      switch(mb_mode){
+        case OC_MODE_INTER_MV_FOUR:{
+          oc_mv       lbmvs[4];
+          int         bi;
+          /*Mark the tail of the list, so we don't accidentally go past it.*/
+          coded[ncoded]=-1;
+          for(bi=codedi=0;bi<4;bi++){
+            if(coded[codedi]==bi){
+              codedi++;
+              fragi=mb_maps[mbi][0][bi];
+              frags[fragi].mb_mode=mb_mode;
+              lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
+            }
+            else lbmvs[bi][0]=lbmvs[bi][1]=0;
+          }
+          if(codedi>0){
+            memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+            memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0]));
+          }
+          if(codedi<ncoded){
+            (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+            for(;codedi<ncoded;codedi++){
+              mapi=coded[codedi];
+              bi=mapi&3;
+              fragi=mb_maps[mbi][mapi>>2][bi];
+              frags[fragi].mb_mode=mb_mode;
+              memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
+            }
+          }
+        }break;
+        case OC_MODE_INTER_MV:{
+          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+          mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
+        case OC_MODE_INTER_MV_LAST2:{
+          memcpy(mbmv,last_mv[1],sizeof(mbmv));
+          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+          memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
+        }break;
+        case OC_MODE_GOLDEN_MV:{
+          mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        default:memset(mbmv,0,sizeof(mbmv));break;
+      }
+      /*4MV mode fills in the fragments itself.
+        For all other modes we can use this common code.*/
+      if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          fragi=mb_maps[mbi][mapi>>2][mapi&3];
+          frags[fragi].mb_mode=mb_mode;
+          memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
+        }
+      }
+    }
+  }
+}
+
+static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        fragi;
+  ncoded_fragis=_dec->state.ntotal_coded_fragis;
+  if(ncoded_fragis<=0)return;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  if(_dec->state.nqis==1){
+    /*If this frame has only a single qi value, then just use it for all coded
+       fragments.*/
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      frags[coded_fragis[fragii]].qii=0;
+    }
+  }
+  else{
+    long val;
+    int  flag;
+    int  nqi1;
+    int  run_count;
+    /*Otherwise, we decode a qi index for each fragment, using two passes of
+      the same binary RLE scheme used for super-block coded bits.
+     The first pass marks each fragment as having a qii of 0 or greater than
+      0, and the second pass (if necessary), distinguishes between a qii of
+      1 and 2.
+     At first we just store the qii in the fragment.
+     After all the qii's are decoded, we make a final pass to replace them
+      with the corresponding qi's for this frame.*/
+    val=oc_pack_read1(&_dec->opb);
+    flag=(int)val;
+    nqi1=0;
+    fragii=0;
+    while(fragii<ncoded_fragis){
+      int full_run;
+      run_count=oc_sb_run_unpack(&_dec->opb);
+      full_run=run_count>=4129;
+      do{
+        frags[coded_fragis[fragii++]].qii=flag;
+        nqi1+=flag;
+      }
+      while(--run_count>0&&fragii<ncoded_fragis);
+      if(full_run&&fragii<ncoded_fragis){
+        val=oc_pack_read1(&_dec->opb);
+        flag=(int)val;
+      }
+      else flag=!flag;
+    }
+    /*TODO: run_count should be 0 here.
+      If it's not, we should issue a warning of some kind.*/
+    /*If we have 3 different qi's for this frame, and there was at least one
+       fragment with a non-zero qi, make the second pass.*/
+    if(_dec->state.nqis==3&&nqi1>0){
+      /*Skip qii==0 fragments.*/
+      for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+      do{
+        int full_run;
+        run_count=oc_sb_run_unpack(&_dec->opb);
+        full_run=run_count>=4129;
+        for(;fragii<ncoded_fragis;fragii++){
+          fragi=coded_fragis[fragii];
+          if(frags[fragi].qii==0)continue;
+          if(run_count--<=0)break;
+          frags[fragi].qii+=flag;
+        }
+        if(full_run&&fragii<ncoded_fragis){
+          val=oc_pack_read1(&_dec->opb);
+          flag=(int)val;
+        }
+        else flag=!flag;
+      }
+      while(fragii<ncoded_fragis);
+      /*TODO: run_count should be 0 here.
+        If it's not, we should issue a warning of some kind.*/
+    }
+  }
+}
+
+
+
+/*Unpacks the DC coefficient tokens.
+  Unlike when unpacking the AC coefficient tokens, we actually need to decode
+   the DC coefficient values now so that we can do DC prediction.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  Return: The length of any outstanding EOB run.*/
+static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64]){
+  unsigned char   *dct_tokens;
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        eobs;
+  ptrdiff_t        ti;
+  int              pli;
+  dct_tokens=_dec->dct_tokens;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  ncoded_fragis=fragii=eobs=ti=0;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    ptrdiff_t eobi;
+    int       rli;
+    ncoded_fragis+=_dec->state.ncoded_fragis[pli];
+    memset(run_counts,0,sizeof(run_counts));
+    _dec->eob_runs[pli][0]=eobs;
+    _dec->ti0[pli][0]=ti;
+    /*Continue any previous EOB run, if there was one.*/
+    eobi=eobs;
+    if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
+    eob_count=eobi;
+    eobs-=eobi;
+    while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+    while(fragii<ncoded_fragis){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
+      if(eobs){
+        eobi=OC_MINI(eobs,ncoded_fragis-fragii);
+        eob_count+=eobi;
+        eobs-=eobi;
+        while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+      }
+      else{
+        int coeff;
+        skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        if(skip)coeff=0;
+        run_counts[skip]++;
+        frags[coded_fragis[fragii++]].dc=coeff;
+      }
+    }
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return eobs;
+}
+
+/*Unpacks the AC coefficient tokens.
+  This can completely discard coefficient values while unpacking, and so is
+   somewhat simpler than unpacking the DC coefficient tokens.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  _eobs:       The length of any outstanding EOB run from previous
+                coefficients.
+  Return: The length of any outstanding EOB run.*/
+static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
+  unsigned char *dct_tokens;
+  ptrdiff_t      ti;
+  int            pli;
+  dct_tokens=_dec->dct_tokens;
+  ti=_dec->dct_tokens_count;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    size_t    ntoks_left;
+    size_t    ntoks;
+    int       rli;
+    _dec->eob_runs[pli][_zzi]=_eobs;
+    _dec->ti0[pli][_zzi]=ti;
+    ntoks_left=_ntoks_left[pli][_zzi];
+    memset(run_counts,0,sizeof(run_counts));
+    eob_count=0;
+    ntoks=0;
+    while(ntoks+_eobs<ntoks_left){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      ntoks+=_eobs;
+      eob_count+=_eobs;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+      _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
+      if(_eobs==0){
+        run_counts[skip]++;
+        ntoks++;
+      }
+    }
+    /*Add the portion of the last EOB run actually used by this coefficient.*/
+    eob_count+=ntoks_left-ntoks;
+    /*And remove it from the remaining EOB count.*/
+    _eobs-=ntoks_left-ntoks;
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return _eobs;
+}
+
+/*Tokens describing the DCT coefficients that belong to each fragment are
+   stored in the bitstream grouped by coefficient, not by fragment.
+
+  This means that we either decode all the tokens in order, building up a
+   separate coefficient list for each fragment as we go, and then go back and
+   do the iDCT on each fragment, or we have to create separate lists of tokens
+   for each coefficient, so that we can pull the next token required off the
+   head of the appropriate list when decoding a specific fragment.
+
+  The former was VP3's choice, and it meant 2*w*h extra storage for all the
+   decoded coefficient values.
+
+  We take the second option, which lets us store just one to three bytes per
+   token (generally far fewer than the number of coefficients, due to EOB
+   tokens and zero runs), and which requires us to only maintain a counter for
+   each of the 64 coefficients, instead of a counter for every fragment to
+   determine where the next token goes.
+
+  We actually use 3 counters per coefficient, one for each color plane, so we
+   can decode all color planes simultaneously.
+  This lets color conversion, etc., be done as soon as a full MCU (one or
+   two super block rows) is decoded, while the image data is still in cache.*/
+
+static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
+  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
+  ptrdiff_t  ntoks_left[3][64];
+  int        huff_idxs[2];
+  ptrdiff_t  eobs;
+  long       val;
+  int        pli;
+  int        zzi;
+  int        hgi;
+  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
+  }
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  _dec->eob_runs[0][0]=0;
+  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
+#if defined(HAVE_CAIRO)
+  _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  zzi=1;
+  for(hgi=1;hgi<5;hgi++){
+    huff_idxs[0]+=16;
+    huff_idxs[1]+=16;
+    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
+      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
+    }
+  }
+  /*TODO: eobs should be exactly zero, or 4096 or greater.
+    The second case occurs when an EOB run of size zero is encountered, which
+     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
+    If neither of these conditions holds, then a warning should be issued.*/
+}
+
+
+static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
+  /*pp_level 0: disabled; free any memory used and return*/
+  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
+    if(_dec->dc_qis!=NULL){
+      _ogg_free(_dec->dc_qis);
+      _dec->dc_qis=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->dc_qis==NULL){
+    /*If we haven't been tracking DC quantization indices, there's no point in
+       starting now.*/
+    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
+    _dec->dc_qis=(unsigned char *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
+    if(_dec->dc_qis==NULL)return 1;
+    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
+  }
+  else{
+    unsigned char   *dc_qis;
+    const ptrdiff_t *coded_fragis;
+    ptrdiff_t        ncoded_fragis;
+    ptrdiff_t        fragii;
+    unsigned char    qi0;
+    /*Update the DC quantization index of each coded block.*/
+    dc_qis=_dec->dc_qis;
+    coded_fragis=_dec->state.coded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[0]+
+     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
+    qi0=(unsigned char)_dec->state.qis[0];
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      dc_qis[coded_fragis[fragii]]=qi0;
+    }
+  }
+  /*pp_level 1: Stop after updating DC quantization indices.*/
+  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
+    if(_dec->variances!=NULL){
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->variances==NULL){
+    size_t frame_sz;
+    size_t c_sz;
+    int    c_w;
+    int    c_h;
+    frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+    c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+    c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+    c_sz=c_w*(size_t)c_h;
+    /*Allocate space for the chroma planes, even if we're not going to use
+       them; this simplifies allocation state management, though it may waste
+       memory on the few systems that don't overcommit pages.*/
+    frame_sz+=c_sz<<1;
+    _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
+     frame_sz*sizeof(_dec->pp_frame_data[0]));
+    _dec->variances=(int *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->variances[0]));
+    if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      return 1;
+    }
+    /*Force an update of the PP buffer pointers.*/
+    _dec->pp_frame_state=0;
+  }
+  /*Update the PP buffer pointers if necessary.*/
+  if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
+    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+      /*If chroma processing is disabled, just use the PP luma plane.*/
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
+       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
+    }
+    else{
+      size_t y_sz;
+      size_t c_sz;
+      int    c_w;
+      int    c_h;
+      /*Otherwise, set up pointers to all three PP planes.*/
+      y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+      c_sz=c_w*(size_t)c_h;
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
+      _dec->pp_frame_buf[1].width=c_w;
+      _dec->pp_frame_buf[1].height=c_h;
+      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
+      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
+      _dec->pp_frame_buf[2].width=c_w;
+      _dec->pp_frame_buf[2].height=c_h;
+      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
+      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
+      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
+    }
+    _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
+  }
+  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
+  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+    memcpy(_dec->pp_frame_buf+1,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
+     sizeof(_dec->pp_frame_buf[1])*2);
+  }
+  return 0;
+}
+
+
+
+typedef struct{
+  int                 bounding_values[256];
+  ptrdiff_t           ti[3][64];
+  ptrdiff_t           eob_runs[3][64];
+  const ptrdiff_t    *coded_fragis[3];
+  const ptrdiff_t    *uncoded_fragis[3];
+  ptrdiff_t           ncoded_fragis[3];
+  ptrdiff_t           nuncoded_fragis[3];
+  const ogg_uint16_t *dequant[3][3][2];
+  int                 fragy0[3];
+  int                 fragy_end[3];
+  int                 pred_last[3][3];
+  int                 mcu_nvfrags;
+  int                 loop_filter;
+  int                 pp_level;
+}oc_dec_pipeline_state;
+
+
+
+/*Initialize the main decoding pipeline.*/
+static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe){
+  const ptrdiff_t *coded_fragis;
+  const ptrdiff_t *uncoded_fragis;
+  int              pli;
+  int              qii;
+  int              qti;
+  /*If chroma is sub-sampled in the vertical direction, we have to decode two
+     super block rows of Y' for each super block row of Cb and Cr.*/
+  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
+  /*Initialize the token and extra bits indices for each plane and
+     coefficient.*/
+  memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
+  /*Also copy over the initial the EOB run counts.*/
+  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
+  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t ncoded_fragis;
+    _pipe->coded_fragis[pli]=coded_fragis;
+    _pipe->uncoded_fragis[pli]=uncoded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[pli];
+    coded_fragis+=ncoded_fragis;
+    uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
+  }
+  /*Set up condensed quantizer tables.*/
+  for(pli=0;pli<3;pli++){
+    for(qii=0;qii<_dec->state.nqis;qii++){
+      for(qti=0;qti<2;qti++){
+        _pipe->dequant[pli][qii][qti]=
+         _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
+      }
+    }
+  }
+  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
+  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
+  /*Initialize the bounding value array for the loop filter.*/
+  _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
+   _pipe->bounding_values);
+  /*Initialize any buffers needed for post-processing.
+    We also save the current post-processing level, to guard against the user
+     changing it from a callback.*/
+  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
+  /*If we don't have enough information to post-process, disable it, regardless
+     of the user-requested level.*/
+  else{
+    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
+    memcpy(_dec->pp_frame_buf,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
+     sizeof(_dec->pp_frame_buf[0])*3);
+  }
+}
+
+/*Undo the DC prediction in a single plane of an MCU (one or two super block
+   rows).
+  As a side effect, the number of coded and uncoded fragments in this plane of
+   the MCU is also computed.*/
+static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  const oc_fragment_plane *fplane;
+  oc_fragment             *frags;
+  int                     *pred_last;
+  ptrdiff_t                ncoded_fragis;
+  ptrdiff_t                fragi;
+  int                      fragx;
+  int                      fragy;
+  int                      fragy0;
+  int                      fragy_end;
+  int                      nhfrags;
+  /*Compute the first and last fragment row of the current MCU for this
+     plane.*/
+  fplane=_dec->state.fplanes+_pli;
+  fragy0=_pipe->fragy0[_pli];
+  fragy_end=_pipe->fragy_end[_pli];
+  nhfrags=fplane->nhfrags;
+  pred_last=_pipe->pred_last[_pli];
+  frags=_dec->state.frags;
+  ncoded_fragis=0;
+  fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
+  for(fragy=fragy0;fragy<fragy_end;fragy++){
+    if(fragy==0){
+      /*For the first row, all of the cases reduce to just using the previous
+         predictor for the same reference frame.*/
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        if(frags[fragi].coded){
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          pred_last[ref]=frags[fragi].dc+=pred_last[ref];
+          ncoded_fragis++;
+        }
+      }
+    }
+    else{
+      oc_fragment *u_frags;
+      int          l_ref;
+      int          ul_ref;
+      int          u_ref;
+      u_frags=frags-nhfrags;
+      l_ref=-1;
+      ul_ref=-1;
+      u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        int ur_ref;
+        if(fragx+1>=nhfrags)ur_ref=-1;
+        else{
+          ur_ref=u_frags[fragi+1].coded?
+           OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
+        }
+        if(frags[fragi].coded){
+          int pred;
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          /*We break out a separate case based on which of our neighbors use
+             the same reference frames.
+            This is somewhat faster than trying to make a generic case which
+             handles all of them, since it reduces lots of poorly predicted
+             jumps to one switch statement, and also lets a number of the
+             multiplications be optimized out by strength reduction.*/
+          switch((l_ref==ref)|(ul_ref==ref)<<1|
+           (u_ref==ref)<<2|(ur_ref==ref)<<3){
+            default:pred=pred_last[ref];break;
+            case  1:
+            case  3:pred=frags[fragi-1].dc;break;
+            case  2:pred=u_frags[fragi-1].dc;break;
+            case  4:
+            case  6:
+            case 12:pred=u_frags[fragi].dc;break;
+            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+            case  8:pred=u_frags[fragi+1].dc;break;
+            case  9:
+            case 11:
+            case 13:{
+              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+            }break;
+            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+            case 14:{
+              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+               +10*u_frags[fragi].dc)/16;
+            }break;
+            case  7:
+            case 15:{
+              int p0;
+              int p1;
+              int p2;
+              p0=frags[fragi-1].dc;
+              p1=u_frags[fragi-1].dc;
+              p2=u_frags[fragi].dc;
+              pred=(29*(p0+p2)-26*p1)/32;
+              if(abs(pred-p2)>128)pred=p2;
+              else if(abs(pred-p0)>128)pred=p0;
+              else if(abs(pred-p1)>128)pred=p1;
+            }break;
+          }
+          pred_last[ref]=frags[fragi].dc+=pred;
+          ncoded_fragis++;
+          l_ref=ref;
+        }
+        else l_ref=-1;
+        ul_ref=u_ref;
+        u_ref=ur_ref;
+      }
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  /*Also save the number of uncoded fragments so we know how many to copy.*/
+  _pipe->nuncoded_fragis[_pli]=
+   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
+}
+
+/*Reconstructs all coded fragments in a single MCU (one or two super block
+   rows).
+  This requires that each coded fragment have a proper macro block mode and
+   motion vector (if not in INTRA mode), and have it's DC value decoded, with
+   the DC prediction process reversed, and the number of coded and uncoded
+   fragments in this plane of the MCU be counted.
+  The token lists for each color plane and coefficient should also be filled
+   in, along with initial token offsets, extra bits offsets, and EOB run
+   counts.*/
+static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  unsigned char       *dct_tokens;
+  const unsigned char *dct_fzig_zag;
+  ogg_uint16_t         dc_quant[2];
+  const oc_fragment   *frags;
+  const ptrdiff_t     *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t            fragii;
+  ptrdiff_t           *ti;
+  ptrdiff_t           *eob_runs;
+  int                  qti;
+  dct_tokens=_dec->dct_tokens;
+  dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
+  frags=_dec->state.frags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  ti=_pipe->ti[_pli];
+  eob_runs=_pipe->eob_runs[_pli];
+  for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
+  for(fragii=0;fragii<ncoded_fragis;fragii++){
+    /*This array is made one element larger because the zig-zag index array
+       uses the final element as a dumping ground for out-of-range indices
+       to protect us from buffer overflow.*/
+    OC_ALIGN8(ogg_int16_t dct_coeffs[65]);
+    const ogg_uint16_t *ac_quant;
+    ptrdiff_t           fragi;
+    int                 last_zzi;
+    int                 zzi;
+    fragi=coded_fragis[fragii];
+    for(zzi=0;zzi<64;zzi++)dct_coeffs[zzi]=0;
+    qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
+    ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
+    /*Decode the AC coefficients.*/
+    for(zzi=0;zzi<64;){
+      int token;
+      last_zzi=zzi;
+      if(eob_runs[zzi]){
+        eob_runs[zzi]--;
+        break;
+      }
+      else{
+        ptrdiff_t eob;
+        int       cw;
+        int       rlen;
+        int       coeff;
+        int       lti;
+        lti=ti[zzi];
+        token=dct_tokens[lti++];
+        cw=OC_DCT_CODE_WORD[token];
+        /*These parts could be done branchless, but the branches are fairly
+           predictable and the C code translates into more than a few
+           instructions, so it's worth it to avoid them.*/
+        if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+          cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
+        }
+        eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+        if(token==OC_DCT_TOKEN_FAT_EOB){
+          eob+=dct_tokens[lti++]<<8;
+          if(eob==0)eob=OC_DCT_EOB_FINISH;
+        }
+        rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        eob_runs[zzi]=eob;
+        ti[zzi]=lti;
+        zzi+=rlen;
+        dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]);
+        zzi+=!eob;
+      }
+    }
+    /*TODO: zzi should be exactly 64 here.
+      If it's not, we should report some kind of warning.*/
+    zzi=OC_MINI(zzi,64);
+    dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
+    /*last_zzi is always initialized.
+      If your compiler thinks otherwise, it is dumb.*/
+    oc_state_frag_recon(&_dec->state,fragi,_pli,
+     dct_coeffs,last_zzi,dc_quant[qti]);
+  }
+  _pipe->coded_fragis[_pli]+=ncoded_fragis;
+  /*Right now the reconstructed MCU has only the coded blocks in it.*/
+  /*TODO: We make the decision here to always copy the uncoded blocks into it
+     from the reference frame.
+    We could also copy the coded blocks back over the reference frame, if we
+     wait for an additional MCU to be decoded, which might be faster if only a
+     small number of blocks are coded.
+    However, this introduces more latency, creating a larger cache footprint.
+    It's unknown which decision is better, but this one results in simpler
+     code, and the hard case (high bitrate, high resolution) is handled
+     correctly.*/
+  /*Copy the uncoded blocks from the previous reference frame.*/
+  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+  oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli],
+   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
+}
+
+/*Filter a horizontal block edge.*/
+static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
+ int *_variance0,int *_variance1){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  const unsigned char *csrc;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  rdst=_dst;
+  rsrc=_src;
+  for(bx=0;bx<8;bx++){
+    cdst=rdst;
+    csrc=rsrc;
+    for(by=0;by<10;by++){
+      r[by]=*csrc;
+      csrc+=_src_ystride;
+    }
+    sum0=sum1=0;
+    for(by=0;by<4;by++){
+      sum0+=abs(r[by+1]-r[by]);
+      sum1+=abs(r[by+5]-r[by+6]);
+    }
+    *_variance0+=OC_MINI(255,sum0);
+    *_variance1+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      cdst+=_dst_ystride;
+      for(by=0;by<4;by++){
+        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
+         r[by+4]+r[by+5]+r[by+6]+4>>3);
+        cdst+=_dst_ystride;
+      }
+      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    else{
+      for(by=1;by<=8;by++){
+        *cdst=(unsigned char)r[by];
+        cdst+=_dst_ystride;
+      }
+    }
+    rdst++;
+    rsrc++;
+  }
+}
+
+/*Filter a vertical block edge.*/
+static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
+ int _qstep,int _flimit,int *_variances){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  cdst=_dst;
+  for(by=0;by<8;by++){
+    rsrc=cdst-1;
+    rdst=cdst;
+    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
+    sum0=sum1=0;
+    for(bx=0;bx<4;bx++){
+      sum0+=abs(r[bx+1]-r[bx]);
+      sum1+=abs(r[bx+5]-r[bx+6]);
+    }
+    _variances[0]+=OC_MINI(255,sum0);
+    _variances[1]+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      for(bx=0;bx<4;bx++){
+        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
+         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
+      }
+      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    cdst+=_dst_ystride;
+  }
+}
+
+static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
+ th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
+ int _fragy_end){
+  oc_fragment_plane   *fplane;
+  int                 *variance;
+  unsigned char       *dc_qi;
+  unsigned char       *dst;
+  const unsigned char *src;
+  ptrdiff_t            froffset;
+  int                  dst_ystride;
+  int                  src_ystride;
+  int                  nhfrags;
+  int                  width;
+  int                  notstart;
+  int                  notdone;
+  int                  flimit;
+  int                  qstep;
+  int                  y_end;
+  int                  y;
+  int                  x;
+  _dst+=_pli;
+  _src+=_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  dc_qi=_dec->dc_qis+froffset;
+  notstart=_fragy0>0;
+  notdone=_fragy_end<fplane->nvfrags;
+  /*We want to clear an extra row of variances, except at the end.*/
+  memset(variance+(nhfrags&-notstart),0,
+   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
+  /*Except for the first time, we want to point to the middle of the row.*/
+  y=(_fragy0<<3)+(notstart<<2);
+  dst_ystride=_dst->stride;
+  src_ystride=_src->stride;
+  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
+  src=_src->data+y*(ptrdiff_t)src_ystride;
+  width=_dst->width;
+  for(;y<4;y++){
+    memcpy(dst,src,width*sizeof(dst[0]));
+    dst+=dst_ystride;
+    src+=src_ystride;
+  }
+  /*We also want to skip the last row in the frame for this loop.*/
+  y_end=_fragy_end-!notdone<<3;
+  for(;y<y_end;y+=8){
+    qstep=_dec->pp_dc_scale[*dc_qi];
+    flimit=(qstep*3)>>2;
+    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
+     qstep,flimit,variance,variance+nhfrags);
+    variance++;
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi];
+      flimit=(qstep*3)>>2;
+      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
+       qstep,flimit,variance,variance+nhfrags);
+      oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
+       qstep,flimit,variance-1);
+      variance++;
+      dc_qi++;
+    }
+    dst+=dst_ystride<<3;
+    src+=src_ystride<<3;
+  }
+  /*And finally, handle the last row in the frame, if it's in the range.*/
+  if(!notdone){
+    int height;
+    height=_dst->height;
+    for(;y<height;y++){
+      memcpy(dst,src,width*sizeof(dst[0]));
+      dst+=dst_ystride;
+      src+=src_ystride;
+    }
+    /*Filter the last row of vertical block edges.*/
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi++];
+      flimit=(qstep*3)>>2;
+      oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
+       qstep,flimit,variance++);
+    }
+  }
+}
+
+static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
+ int _dc_scale,int _sharp_mod,int _strong){
+  static const unsigned char OC_MOD_MAX[2]={24,32};
+  static const unsigned char OC_MOD_SHIFT[2]={1,0};
+  const unsigned char *psrc;
+  const unsigned char *src;
+  const unsigned char *nsrc;
+  unsigned char       *dst;
+  int                  vmod[72];
+  int                  hmod[72];
+  int                  mod_hi;
+  int                  by;
+  int                  bx;
+  mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
+  dst=_idata;
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  for(by=0;by<9;by++){
+    for(bx=0;bx<8;bx++){
+      int mod;
+      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
+      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+    }
+    psrc=src;
+    src+=_ystride&-(!(_b&8)|by<7);
+  }
+  nsrc=dst;
+  psrc=dst-!(_b&1);
+  for(bx=0;bx<9;bx++){
+    src=nsrc;
+    for(by=0;by<8;by++){
+      int mod;
+      mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
+      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+      psrc+=_ystride;
+      src+=_ystride;
+    }
+    psrc=nsrc;
+    nsrc+=!(_b&2)|bx<7;
+  }
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  nsrc=src+_ystride;
+  for(by=0;by<8;by++){
+    int a;
+    int b;
+    int w;
+    a=128;
+    b=64;
+    w=hmod[by];
+    a-=w;
+    b+=w**(src-!(_b&1));
+    w=vmod[by<<3];
+    a-=w;
+    b+=w*psrc[0];
+    w=vmod[by+1<<3];
+    a-=w;
+    b+=w*nsrc[0];
+    w=hmod[(1<<3)+by];
+    a-=w;
+    b+=w*src[1];
+    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
+    for(bx=1;bx<7;bx++){
+      a=128;
+      b=64;
+      w=hmod[(bx<<3)+by];
+      a-=w;
+      b+=w*src[bx-1];
+      w=vmod[(by<<3)+bx];
+      a-=w;
+      b+=w*psrc[bx];
+      w=vmod[(by+1<<3)+bx];
+      a-=w;
+      b+=w*nsrc[bx];
+      w=hmod[(bx+1<<3)+by];
+      a-=w;
+      b+=w*src[bx+1];
+      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
+    }
+    a=128;
+    b=64;
+    w=hmod[(7<<3)+by];
+    a-=w;
+    b+=w*src[6];
+    w=vmod[(by<<3)+7];
+    a-=w;
+    b+=w*psrc[7];
+    w=vmod[(by+1<<3)+7];
+    a-=w;
+    b+=w*nsrc[7];
+    w=hmod[(8<<3)+by];
+    a-=w;
+    b+=w*src[7+!(_b&2)];
+    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
+    dst+=_ystride;
+    psrc=src;
+    src=nsrc;
+    nsrc+=_ystride&-(!(_b&8)|by<6);
+  }
+}
+
+#define OC_DERING_THRESH1 (384)
+#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
+#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
+#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
+
+static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
+ int _pli,int _fragy0,int _fragy_end){
+  th_img_plane      *iplane;
+  oc_fragment_plane *fplane;
+  oc_fragment       *frag;
+  int               *variance;
+  unsigned char     *idata;
+  ptrdiff_t          froffset;
+  int                ystride;
+  int                nhfrags;
+  int                sthresh;
+  int                strong;
+  int                y_end;
+  int                width;
+  int                height;
+  int                y;
+  int                x;
+  iplane=_img+_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  frag=_dec->state.frags+froffset;
+  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
+  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
+  y=_fragy0<<3;
+  ystride=iplane->stride;
+  idata=iplane->data+y*(ptrdiff_t)ystride;
+  y_end=_fragy_end<<3;
+  width=iplane->width;
+  height=iplane->height;
+  for(;y<y_end;y+=8){
+    for(x=0;x<width;x+=8){
+      int b;
+      int qi;
+      int var;
+      qi=_dec->state.qis[frag->qii];
+      var=*variance;
+      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
+      if(strong&&var>sthresh){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
+         !(b&2)&&variance[1]>OC_DERING_THRESH4||
+         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
+         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        }
+      }
+      else if(var>OC_DERING_THRESH2){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+      }
+      else if(var>OC_DERING_THRESH1){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
+      }
+      frag++;
+      variance++;
+    }
+    idata+=ystride<<3;
+  }
+}
+
+
+
+th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
+  oc_dec_ctx *dec;
+  if(_info==NULL||_setup==NULL)return NULL;
+  dec=_ogg_malloc(sizeof(*dec));
+  if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
+    _ogg_free(dec);
+    return NULL;
+  }
+  dec->state.curframe_num=0;
+  return dec;
+}
+
+void th_decode_free(th_dec_ctx *_dec){
+  if(_dec!=NULL){
+    oc_dec_clear(_dec);
+    _ogg_free(_dec);
+  }
+}
+
+int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz){
+  switch(_req){
+  case TH_DECCTL_GET_PPLEVEL_MAX:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    (*(int *)_buf)=OC_PP_LEVEL_MAX;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_PPLEVEL:{
+    int pp_level;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    pp_level=*(int *)_buf;
+    if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
+    _dec->pp_level=pp_level;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_GRANPOS:{
+    ogg_int64_t granpos;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
+    granpos=*(ogg_int64_t *)_buf;
+    if(granpos<0)return TH_EINVAL;
+    _dec->state.granpos=granpos;
+    _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
+     -_dec->state.granpos_bias;
+    _dec->state.curframe_num=_dec->state.keyframe_num
+     +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
+    return 0;
+  }break;
+  case TH_DECCTL_SET_STRIPE_CB:{
+    th_stripe_callback *cb;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
+    cb=(th_stripe_callback *)_buf;
+    _dec->stripe_cb.ctx=cb->ctx;
+    _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
+    return 0;
+  }break;
+#ifdef HAVE_CAIRO
+  case TH_DECCTL_SET_TELEMETRY_MBMODE:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mbmode=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_MV:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mv=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_QI:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_qi=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_BITS:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_bits=*(int *)_buf;
+    return 0;
+  }break;
+#endif
+  default:return TH_EIMPL;
+  }
+}
+
+/*We're decoding an INTER frame, but have no initialized reference
+   buffers (i.e., decoding did not start on a key frame).
+  We initialize them to a solid gray here.*/
+static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
+  th_info *info;
+  size_t   yplane_sz;
+  size_t   cplane_sz;
+  int      yhstride;
+  int      yheight;
+  int      chstride;
+  int      cheight;
+  _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
+  _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
+  _dec->state.ref_frame_idx[OC_FRAME_SELF]=1;
+  info=&_dec->state.info;
+  yhstride=info->frame_width+2*OC_UMV_PADDING;
+  yheight=info->frame_height+2*OC_UMV_PADDING;
+  chstride=yhstride>>!(info->pixel_fmt&1);
+  cheight=yheight>>!(info->pixel_fmt&2);
+  yplane_sz=yhstride*(size_t)yheight;
+  cplane_sz=chstride*(size_t)cheight;
+  memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
+}
+
+int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos){
+  int ret;
+  if(_dec==NULL||_op==NULL)return TH_EFAULT;
+  /*A completely empty packet indicates a dropped frame and is treated exactly
+     like an inter frame with no coded blocks.
+    Only proceed if we have a non-empty packet.*/
+  if(_op->bytes!=0){
+    oc_dec_pipeline_state pipe;
+    th_ycbcr_buffer       stripe_buf;
+    int                   stripe_fragy;
+    int                   refi;
+    int                   pli;
+    int                   notstart;
+    int                   notdone;
+    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_frame_bytes=_op->bytes;
+#endif
+    ret=oc_dec_frame_header_unpack(_dec);
+    if(ret<0)return ret;
+    /*Select a free buffer to use for the reconstructed version of this
+       frame.*/
+    if(_dec->state.frame_type!=OC_INTRA_FRAME&&
+     (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+     _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
+      /*No reference frames yet!*/
+      oc_dec_init_dummy_frame(_dec);
+      refi=_dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    else{
+      for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
+       refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+    }
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      oc_dec_mark_all_intra(_dec);
+      _dec->state.keyframe_num=_dec->state.curframe_num;
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=
+       _dec->telemetry_mode_bytes=
+       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    else{
+      oc_dec_coded_flags_unpack(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mb_modes_unpack(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    oc_dec_block_qis_unpack(_dec);
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    oc_dec_residual_tokens_unpack(_dec);
+    /*Update granule position.
+      This must be done before the striped decode callbacks so that the
+       application knows what to do with the frame data.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    /*All of the rest of the operations -- DC prediction reversal,
+       reconstructing coded fragments, copying uncoded fragments, loop
+       filtering, extending borders, and out-of-loop post-processing -- should
+       be pipelined.
+      I.e., DC prediction reversal, reconstruction, and uncoded fragment
+       copying are done for one or two super block rows, then loop filtering is
+       run as far as it can, then bordering copying, then post-processing.
+      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
+       block rows, and one chroma.
+      Otherwise, an MCU consists of one super block row from each plane.
+      Inside each MCU, we perform all of the steps on one color plane before
+       moving on to the next.
+      After reconstruction, the additional filtering stages introduce a delay
+       since they need some pixels from the next fragment row.
+      Thus the actual number of decoded rows available is slightly smaller for
+       the first MCU, and slightly larger for the last.
+
+      This entire process allows us to operate on the data while it is still in
+       cache, resulting in big performance improvements.
+      An application callback allows further application processing (blitting
+       to video memory, color conversion, etc.) to also use the data while it's
+       in cache.*/
+    oc_dec_pipeline_init(_dec,&pipe);
+    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
+    notstart=0;
+    notdone=1;
+    for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
+      int avail_fragy0;
+      int avail_fragy_end;
+      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
+      notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
+      for(pli=0;pli<3;pli++){
+        oc_fragment_plane *fplane;
+        int                frag_shift;
+        int                pp_offset;
+        int                sdelay;
+        int                edelay;
+        fplane=_dec->state.fplanes+pli;
+        /*Compute the first and last fragment row of the current MCU for this
+           plane.*/
+        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+        pipe.fragy0[pli]=stripe_fragy>>frag_shift;
+        pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
+         pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
+        oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
+        oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
+        sdelay=edelay=0;
+        if(pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
+           refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+        }
+        /*To fill the borders, we have an additional two pixel delay, since a
+           fragment in the next row could filter its top edge, using two pixels
+           from a fragment in this row.
+          But there's no reason to delay a full fragment between the two.*/
+        oc_state_borders_fill_rows(&_dec->state,refi,pli,
+         (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
+         (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
+        /*Out-of-loop post-processing.*/
+        pp_offset=3*(pli!=0);
+        if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
+          /*Perform de-blocking in one plane.*/
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
+           _dec->state.ref_frame_bufs[refi],pli,
+           pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
+            /*Perform de-ringing in one plane.*/
+            sdelay+=notstart;
+            edelay+=notdone;
+            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
+             pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          }
+        }
+        /*If no post-processing is done, we still need to delay a row for the
+           loop filter, thanks to the strange filtering order VP3 chose.*/
+        else if(pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+        }
+        /*Compute the intersection of the available rows in all planes.
+          If chroma is sub-sampled, the effect of each of its delays is
+           doubled, but luma might have more post-processing filters enabled
+           than chroma, so we don't know up front which one is the limiting
+           factor.*/
+        avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
+        avail_fragy_end=OC_MINI(avail_fragy_end,
+         pipe.fragy_end[pli]-edelay<<frag_shift);
+      }
+      if(_dec->stripe_cb.stripe_decoded!=NULL){
+        /*The callback might want to use the FPU, so let's make sure they can.
+          We violate all kinds of ABI restrictions by not doing this until
+           now, but none of them actually matter since we don't use floating
+           point ourselves.*/
+        oc_restore_fpu(&_dec->state);
+        /*Make the callback, ensuring we flip the sense of the "start" and
+           "end" of the available region upside down.*/
+        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
+         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
+         _dec->state.fplanes[0].nvfrags-avail_fragy0);
+      }
+      notstart=1;
+    }
+    /*Finish filling in the reference frame borders.*/
+    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
+    /*Update the reference frame indices.*/
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      /*The new frame becomes both the previous and gold reference frames.*/
+      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
+       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    else{
+      /*Otherwise, just replace the previous reference frame.*/
+      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
+       gamma values, if nothing else).*/
+    oc_restore_fpu(&_dec->state);
+#if defined(OC_DUMP_IMAGES)
+    /*Don't dump images for dropped frames.*/
+    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
+#endif
+    return 0;
+  }
+  else{
+    if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+     _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){
+      int refi;
+      /*No reference frames yet!*/
+      oc_dec_init_dummy_frame(_dec);
+      refi=_dec->state.ref_frame_idx[OC_FRAME_PREV];
+      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+      memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi],
+       sizeof(_dec->pp_frame_buf[0])*3);
+    }
+    /*Just update the granule position and return.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    return TH_DUPFRAME;
+  }
+}
+
+int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
+  if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
+  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
+#if defined(HAVE_CAIRO)
+  /*If telemetry ioctls are active, we need to draw to the output buffer.
+    Stuff the plane into cairo.*/
+  if(_dec->telemetry){
+    cairo_surface_t *cs;
+    unsigned char   *data;
+    unsigned char   *y_row;
+    unsigned char   *u_row;
+    unsigned char   *v_row;
+    unsigned char   *rgb_row;
+    int              cstride;
+    int              w;
+    int              h;
+    int              x;
+    int              y;
+    int              hdec;
+    int              vdec;
+    w=_ycbcr[0].width;
+    h=_ycbcr[0].height;
+    hdec=!(_dec->state.info.pixel_fmt&1);
+    vdec=!(_dec->state.info.pixel_fmt&2);
+    /*Lazy data buffer init.
+      We could try to re-use the post-processing buffer, which would save
+       memory, but complicate the allocation logic there.
+      I don't think anyone cares about memory usage when using telemetry; it is
+       not meant for embedded devices.*/
+    if(_dec->telemetry_frame_data==NULL){
+      _dec->telemetry_frame_data=_ogg_malloc(
+       (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
+      if(_dec->telemetry_frame_data==NULL)return 0;
+    }
+    cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
+    /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
+    data=cairo_image_surface_get_data(cs);
+    if(data==NULL){
+      cairo_surface_destroy(cs);
+      return 0;
+    }
+    cstride=cairo_image_surface_get_stride(cs);
+    y_row=_ycbcr[0].data;
+    u_row=_ycbcr[1].data;
+    v_row=_ycbcr[2].data;
+    rgb_row=data;
+    for(y=0;y<h;y++){
+      for(x=0;x<w;x++){
+        int r;
+        int g;
+        int b;
+        r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
+        g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
+         -2672387*v_row[x>>hdec]+447306710)/3287200;
+        b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
+        rgb_row[4*x+0]=OC_CLAMP255(b);
+        rgb_row[4*x+1]=OC_CLAMP255(g);
+        rgb_row[4*x+2]=OC_CLAMP255(r);
+      }
+      y_row+=_ycbcr[0].stride;
+      u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
+      v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
+      rgb_row+=cstride;
+    }
+    /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
+    {
+      cairo_t           *c;
+      const oc_fragment *frags;
+      oc_mv             *frag_mvs;
+      const signed char *mb_modes;
+      oc_mb_map         *mb_maps;
+      size_t             nmbs;
+      size_t             mbi;
+      int                row2;
+      int                col2;
+      int                qim[3]={0,0,0};
+      if(_dec->state.nqis==2){
+        int bqi;
+        bqi=_dec->state.qis[0];
+        if(_dec->state.qis[1]>bqi)qim[1]=1;
+        if(_dec->state.qis[1]<bqi)qim[1]=-1;
+      }
+      if(_dec->state.nqis==3){
+        int bqi;
+        int cqi;
+        int dqi;
+        bqi=_dec->state.qis[0];
+        cqi=_dec->state.qis[1];
+        dqi=_dec->state.qis[2];
+        if(cqi>bqi&&dqi>bqi){
+          if(dqi>cqi){
+            qim[1]=1;
+            qim[2]=2;
+          }
+          else{
+            qim[1]=2;
+            qim[2]=1;
+          }
+        }
+        else if(cqi<bqi&&dqi<bqi){
+          if(dqi<cqi){
+            qim[1]=-1;
+            qim[2]=-2;
+          }
+          else{
+            qim[1]=-2;
+            qim[2]=-1;
+          }
+        }
+        else{
+          if(cqi<bqi)qim[1]=-1;
+          else qim[1]=1;
+          if(dqi<bqi)qim[2]=-1;
+          else qim[2]=1;
+        }
+      }
+      c=cairo_create(cs);
+      frags=_dec->state.frags;
+      frag_mvs=_dec->state.frag_mvs;
+      mb_modes=_dec->state.mb_modes;
+      mb_maps=_dec->state.mb_maps;
+      nmbs=_dec->state.nmbs;
+      row2=0;
+      col2=0;
+      for(mbi=0;mbi<nmbs;mbi++){
+        float x;
+        float y;
+        int   bi;
+        y=h-(row2+((col2+1>>1)&1))*16-16;
+        x=(col2>>1)*16;
+        cairo_set_line_width(c,1.);
+        /*Keyframe (all intra) red box.*/
+        if(_dec->state.frame_type==OC_INTRA_FRAME){
+          if(_dec->telemetry_mbmode&0x02){
+            cairo_set_source_rgba(c,1.,0,0,.5);
+            cairo_rectangle(c,x+2.5,y+2.5,11,11);
+            cairo_stroke_preserve(c);
+            cairo_set_source_rgba(c,1.,0,0,.25);
+            cairo_fill(c);
+          }
+        }
+        else{
+          const signed char *frag_mv;
+          ptrdiff_t          fragi;
+          for(bi=0;bi<4;bi++){
+            fragi=mb_maps[mbi][0][bi];
+            if(fragi>=0&&frags[fragi].coded){
+              frag_mv=frag_mvs[fragi];
+              break;
+            }
+          }
+          if(bi<4){
+            switch(mb_modes[mbi]){
+              case OC_MODE_INTRA:{
+                if(_dec->telemetry_mbmode&0x02){
+                  cairo_set_source_rgba(c,1.,0,0,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,0,0,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_INTER_NOMV:{
+                if(_dec->telemetry_mbmode&0x01){
+                  cairo_set_source_rgba(c,0,0,1.,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,0,0,1.,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV:{
+                if(_dec->telemetry_mbmode&0x04){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x04){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_LAST:{
+                if(_dec->telemetry_mbmode&0x08){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_move_to(c,x+13.5,y+2.5);
+                  cairo_line_to(c,x+2.5,y+8);
+                  cairo_line_to(c,x+13.5,y+13.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x08){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_LAST2:{
+                if(_dec->telemetry_mbmode&0x10){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_move_to(c,x+8,y+2.5);
+                  cairo_line_to(c,x+2.5,y+8);
+                  cairo_line_to(c,x+8,y+13.5);
+                  cairo_move_to(c,x+13.5,y+2.5);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_line_to(c,x+13.5,y+13.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x10){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_GOLDEN_NOMV:{
+                if(_dec->telemetry_mbmode&0x20){
+                  cairo_set_source_rgba(c,1.,1.,0,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,1.,0,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_GOLDEN_MV:{
+                if(_dec->telemetry_mbmode&0x40){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,1.,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x40){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_FOUR:{
+                if(_dec->telemetry_mbmode&0x80){
+                  cairo_rectangle(c,x+2.5,y+2.5,4,4);
+                  cairo_rectangle(c,x+9.5,y+2.5,4,4);
+                  cairo_rectangle(c,x+2.5,y+9.5,4,4);
+                  cairo_rectangle(c,x+9.5,y+9.5,4,4);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                /*4mv is odd, coded in raster order.*/
+                fragi=mb_maps[mbi][0][0];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+4,y+12);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][1];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+12,y+12);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][2];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+4,y+4);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][3];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+12,y+4);
+                  cairo_stroke(c);
+                }
+              }break;
+            }
+          }
+        }
+        /*qii illustration.*/
+        if(_dec->telemetry_qi&0x2){
+          cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
+          for(bi=0;bi<4;bi++){
+            ptrdiff_t fragi;
+            int       qiv;
+            int       xp;
+            int       yp;
+            xp=x+(bi&1)*8;
+            yp=y+8-(bi&2)*4;
+            fragi=mb_maps[mbi][0][bi];
+            if(fragi>=0&&frags[fragi].coded){
+              qiv=qim[frags[fragi].qii];
+              cairo_set_line_width(c,3.);
+              cairo_set_source_rgba(c,0.,0.,0.,.5);
+              switch(qiv){
+                /*Double plus:*/
+                case 2:{
+                  if((bi&1)^((bi&2)>>1)){
+                    cairo_move_to(c,xp+2.5,yp+1.5);
+                    cairo_line_to(c,xp+2.5,yp+3.5);
+                    cairo_move_to(c,xp+1.5,yp+2.5);
+                    cairo_line_to(c,xp+3.5,yp+2.5);
+                    cairo_move_to(c,xp+5.5,yp+4.5);
+                    cairo_line_to(c,xp+5.5,yp+6.5);
+                    cairo_move_to(c,xp+4.5,yp+5.5);
+                    cairo_line_to(c,xp+6.5,yp+5.5);
+                    cairo_stroke_preserve(c);
+                    cairo_set_source_rgba(c,0.,1.,1.,1.);
+                  }
+                  else{
+                    cairo_move_to(c,xp+5.5,yp+1.5);
+                    cairo_line_to(c,xp+5.5,yp+3.5);
+                    cairo_move_to(c,xp+4.5,yp+2.5);
+                    cairo_line_to(c,xp+6.5,yp+2.5);
+                    cairo_move_to(c,xp+2.5,yp+4.5);
+                    cairo_line_to(c,xp+2.5,yp+6.5);
+                    cairo_move_to(c,xp+1.5,yp+5.5);
+                    cairo_line_to(c,xp+3.5,yp+5.5);
+                    cairo_stroke_preserve(c);
+                    cairo_set_source_rgba(c,0.,1.,1.,1.);
+                  }
+                }break;
+                /*Double minus:*/
+                case -2:{
+                  cairo_move_to(c,xp+2.5,yp+2.5);
+                  cairo_line_to(c,xp+5.5,yp+2.5);
+                  cairo_move_to(c,xp+2.5,yp+5.5);
+                  cairo_line_to(c,xp+5.5,yp+5.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,1.,1.,1.);
+                }break;
+                /*Plus:*/
+                case 1:{
+                  if(bi&2==0)yp-=2;
+                  if(bi&1==0)xp-=2;
+                  cairo_move_to(c,xp+4.5,yp+2.5);
+                  cairo_line_to(c,xp+4.5,yp+6.5);
+                  cairo_move_to(c,xp+2.5,yp+4.5);
+                  cairo_line_to(c,xp+6.5,yp+4.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,.1,1.,.3,1.);
+                  break;
+                }
+                /*Fall through.*/
+                /*Minus:*/
+                case -1:{
+                  cairo_move_to(c,xp+2.5,yp+4.5);
+                  cairo_line_to(c,xp+6.5,yp+4.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,.3,.1,1.);
+                }break;
+                default:continue;
+              }
+              cairo_set_line_width(c,1.);
+              cairo_stroke(c);
+            }
+          }
+        }
+        col2++;
+        if((col2>>1)>=_dec->state.nhmbs){
+          col2=0;
+          row2+=2;
+        }
+      }
+      /*Bit usage indicator[s]:*/
+      if(_dec->telemetry_bits){
+        int widths[6];
+        int fpsn;
+        int fpsd;
+        int mult;
+        int fullw;
+        int padw;
+        int i;
+        fpsn=_dec->state.info.fps_numerator;
+        fpsd=_dec->state.info.fps_denominator;
+        mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
+        fullw=250.f*h*fpsd*mult/fpsn;
+        padw=w-24;
+        /*Header and coded block bits.*/
+        if(_dec->telemetry_frame_bytes<0||
+         _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
+          _dec->telemetry_frame_bytes=0;
+        }
+        if(_dec->telemetry_coding_bytes<0||
+         _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_coding_bytes=0;
+        }
+        if(_dec->telemetry_mode_bytes<0||
+         _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_mode_bytes=0;
+        }
+        if(_dec->telemetry_mv_bytes<0||
+         _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_mv_bytes=0;
+        }
+        if(_dec->telemetry_qi_bytes<0||
+         _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_qi_bytes=0;
+        }
+        if(_dec->telemetry_dc_bytes<0||
+         _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_dc_bytes=0;
+        }
+        widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
+        widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
+        widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
+        widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
+        widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
+        widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
+        for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
+        cairo_set_source_rgba(c,.0,.0,.0,.6);
+        cairo_rectangle(c,10,h-33,widths[0]+1,5);
+        cairo_rectangle(c,10,h-29,widths[1]+1,5);
+        cairo_rectangle(c,10,h-25,widths[2]+1,5);
+        cairo_rectangle(c,10,h-21,widths[3]+1,5);
+        cairo_rectangle(c,10,h-17,widths[4]+1,5);
+        cairo_rectangle(c,10,h-13,widths[5]+1,5);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,1,0,0);
+        cairo_rectangle(c,10.5,h-32.5,widths[0],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,0,1,0);
+        cairo_rectangle(c,10.5,h-28.5,widths[1],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,0,0,1);
+        cairo_rectangle(c,10.5,h-24.5,widths[2],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.6,.4,.0);
+        cairo_rectangle(c,10.5,h-20.5,widths[3],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.3,.3,.3);
+        cairo_rectangle(c,10.5,h-16.5,widths[4],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.5,.5,.8);
+        cairo_rectangle(c,10.5,h-12.5,widths[5],4);
+        cairo_fill(c);
+      }
+      /*Master qi indicator[s]:*/
+      if(_dec->telemetry_qi&0x1){
+        cairo_text_extents_t extents;
+        char                 buffer[10];
+        int                  p;
+        int                  y;
+        p=0;
+        y=h-7.5;
+        if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
+        buffer[p++]=48+_dec->state.qis[0]%10;
+        if(_dec->state.nqis>=2){
+          buffer[p++]=' ';
+          if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
+          buffer[p++]=48+_dec->state.qis[1]%10;
+        }
+        if(_dec->state.nqis==3){
+          buffer[p++]=' ';
+          if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
+          buffer[p++]=48+_dec->state.qis[2]%10;
+        }
+        buffer[p++]='\0';
+        cairo_select_font_face(c,"sans",
+         CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
+        cairo_set_font_size(c,18);
+        cairo_text_extents(c,buffer,&extents);
+        cairo_set_source_rgb(c,1,1,1);
+        cairo_move_to(c,w-extents.x_advance-10,y);
+        cairo_show_text(c,buffer);
+        cairo_set_source_rgb(c,0,0,0);
+        cairo_move_to(c,w-extents.x_advance-10,y);
+        cairo_text_path(c,buffer);
+        cairo_set_line_width(c,.8);
+        cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
+        cairo_stroke(c);
+      }
+      cairo_destroy(c);
+    }
+    /*Out of the Cairo plane into the telemetry YUV buffer.*/
+    _ycbcr[0].data=_dec->telemetry_frame_data;
+    _ycbcr[0].stride=_ycbcr[0].width;
+    _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
+    _ycbcr[1].stride=_ycbcr[1].width;
+    _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
+    _ycbcr[2].stride=_ycbcr[2].width;
+    y_row=_ycbcr[0].data;
+    u_row=_ycbcr[1].data;
+    v_row=_ycbcr[2].data;
+    rgb_row=data;
+    /*This is one of the few places it's worth handling chroma on a
+       case-by-case basis.*/
+    switch(_dec->state.info.pixel_fmt){
+      case TH_PF_420:{
+        for(y=0;y<h;y+=2){
+          unsigned char *y_row2;
+          unsigned char *rgb_row2;
+          y_row2=y_row+_ycbcr[0].stride;
+          rgb_row2=rgb_row+cstride;
+          for(x=0;x<w;x+=2){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            y_row[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+             +24966*rgb_row[4*x+4]+4207500)/255000;
+            y_row[x+1]=OC_CLAMP255(y);
+            y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
+             +24966*rgb_row2[4*x+0]+4207500)/255000;
+            y_row2[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
+             +24966*rgb_row2[4*x+4]+4207500)/255000;
+            y_row2[x+1]=OC_CLAMP255(y);
+            u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
+             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+             -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
+             +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+             +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
+             +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
+            v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
+             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
+              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+             -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
+              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
+            u_row[x>>1]=OC_CLAMP255(u);
+            v_row[x>>1]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride<<1;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride<<1;
+        }
+      }break;
+      case TH_PF_422:{
+        for(y=0;y<h;y++){
+          for(x=0;x<w;x+=2){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            y_row[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+             +24966*rgb_row[4*x+4]+4207500)/255000;
+            y_row[x+1]=OC_CLAMP255(y);
+            u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
+             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
+             +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
+            v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
+             -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
+             -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
+            u_row[x>>1]=OC_CLAMP255(u);
+            v_row[x>>1]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride;
+        }
+      }break;
+      /*case TH_PF_444:*/
+      default:{
+        for(y=0;y<h;y++){
+          for(x=0;x<w;x++){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
+             +99232*rgb_row[4*x+0]+29032005)/225930;
+            v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
+             -25536*rgb_row[4*x+0]+45940035)/357510;
+            y_row[x]=OC_CLAMP255(y);
+            u_row[x]=OC_CLAMP255(u);
+            v_row[x]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride;
+        }
+      }break;
+    }
+    /*Finished.
+      Destroy the surface.*/
+    cairo_surface_destroy(cs);
+  }
+#endif
+  return 0;
+}

+ 29 - 17
Engine/lib/libtheora/lib/dec/dequant.c → Engine/lib/libtheora/lib/dequant.c

@@ -5,13 +5,13 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-    last mod: $Id: dequant.c 15400 2008-10-15 12:10:58Z tterribe $
+    last mod: $Id: dequant.c 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
@@ -21,8 +21,7 @@
 #include "dequant.h"
 #include "dequant.h"
 #include "decint.h"
 #include "decint.h"
 
 
-int oc_quant_params_unpack(oggpack_buffer *_opb,
- th_quant_info *_qinfo){
+int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){
   th_quant_base *base_mats;
   th_quant_base *base_mats;
   long           val;
   long           val;
   int            nbase_mats;
   int            nbase_mats;
@@ -36,30 +35,31 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
   int            qri;
   int            qri;
   int            qi;
   int            qi;
   int            i;
   int            i;
-  theorapackB_read(_opb,3,&val);
+  val=oc_pack_read(_opb,3);
   nbits=(int)val;
   nbits=(int)val;
   for(qi=0;qi<64;qi++){
   for(qi=0;qi<64;qi++){
-    theorapackB_read(_opb,nbits,&val);
+    val=oc_pack_read(_opb,nbits);
     _qinfo->loop_filter_limits[qi]=(unsigned char)val;
     _qinfo->loop_filter_limits[qi]=(unsigned char)val;
   }
   }
-  theorapackB_read(_opb,4,&val);
+  val=oc_pack_read(_opb,4);
   nbits=(int)val+1;
   nbits=(int)val+1;
   for(qi=0;qi<64;qi++){
   for(qi=0;qi<64;qi++){
-    theorapackB_read(_opb,nbits,&val);
+    val=oc_pack_read(_opb,nbits);
     _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
     _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
   }
   }
-  theorapackB_read(_opb,4,&val);
+  val=oc_pack_read(_opb,4);
   nbits=(int)val+1;
   nbits=(int)val+1;
   for(qi=0;qi<64;qi++){
   for(qi=0;qi<64;qi++){
-    theorapackB_read(_opb,nbits,&val);
+    val=oc_pack_read(_opb,nbits);
     _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
     _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
   }
   }
-  theorapackB_read(_opb,9,&val);
+  val=oc_pack_read(_opb,9);
   nbase_mats=(int)val+1;
   nbase_mats=(int)val+1;
   base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
   base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
+  if(base_mats==NULL)return TH_EFAULT;
   for(bmi=0;bmi<nbase_mats;bmi++){
   for(bmi=0;bmi<nbase_mats;bmi++){
     for(ci=0;ci<64;ci++){
     for(ci=0;ci<64;ci++){
-      theorapackB_read(_opb,8,&val);
+      val=oc_pack_read(_opb,8);
       base_mats[bmi][ci]=(unsigned char)val;
       base_mats[bmi][ci]=(unsigned char)val;
     }
     }
   }
   }
@@ -72,12 +72,12 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
     pli=i%3;
     pli=i%3;
     qranges=_qinfo->qi_ranges[qti]+pli;
     qranges=_qinfo->qi_ranges[qti]+pli;
     if(i>0){
     if(i>0){
-      theorapackB_read1(_opb,&val);
+      val=oc_pack_read1(_opb);
       if(!val){
       if(!val){
         int qtj;
         int qtj;
         int plj;
         int plj;
         if(qti>0){
         if(qti>0){
-          theorapackB_read1(_opb,&val);
+          val=oc_pack_read1(_opb);
           if(val){
           if(val){
             qtj=qti-1;
             qtj=qti-1;
             plj=pli;
             plj=pli;
@@ -95,13 +95,13 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
         continue;
         continue;
       }
       }
     }
     }
-    theorapackB_read(_opb,nbits,&val);
+    val=oc_pack_read(_opb,nbits);
     indices[0]=(int)val;
     indices[0]=(int)val;
     for(qi=qri=0;qi<63;){
     for(qi=qri=0;qi<63;){
-      theorapackB_read(_opb,oc_ilog(62-qi),&val);
+      val=oc_pack_read(_opb,oc_ilog(62-qi));
       sizes[qri]=(int)val+1;
       sizes[qri]=(int)val+1;
       qi+=(int)val+1;
       qi+=(int)val+1;
-      theorapackB_read(_opb,nbits,&val);
+      val=oc_pack_read(_opb,nbits);
       indices[++qri]=(int)val;
       indices[++qri]=(int)val;
     }
     }
     /*Note: The caller is responsible for cleaning up any partially
     /*Note: The caller is responsible for cleaning up any partially
@@ -112,8 +112,20 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
     }
     }
     qranges->nranges=qri;
     qranges->nranges=qri;
     qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
     qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
+    if(qranges->sizes==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
     memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
     memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
     qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
     qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
+    if(qrbms==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
     qranges->base_matrices=(const th_quant_base *)qrbms;
     qranges->base_matrices=(const th_quant_base *)qrbms;
     do{
     do{
       bmi=indices[qri];
       bmi=indices[qri];

+ 4 - 3
Engine/lib/libtheora/lib/dec/dequant.h → Engine/lib/libtheora/lib/dequant.h

@@ -5,21 +5,22 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-    last mod: $Id: dequant.h 15400 2008-10-15 12:10:58Z tterribe $
+    last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
 #if !defined(_dequant_H)
 #if !defined(_dequant_H)
 # define _dequant_H (1)
 # define _dequant_H (1)
 # include "quant.h"
 # include "quant.h"
+# include "bitpack.h"
 
 
-int oc_quant_params_unpack(oggpack_buffer *_opb,
+int oc_quant_params_unpack(oc_pack_buf *_opb,
  th_quant_info *_qinfo);
  th_quant_info *_qinfo);
 void oc_quant_params_clear(th_quant_info *_qinfo);
 void oc_quant_params_clear(th_quant_info *_qinfo);
 
 

+ 0 - 37
Engine/lib/libtheora/lib/enc/block_inline.h

@@ -1,37 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: block_inline.h 14059 2007-10-28 23:43:27Z xiphmont $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static const ogg_int32_t MBOrderMap[4] = { 0, 2, 3, 1 };
-static const ogg_int32_t BlockOrderMap1[4][4] = {
-  { 0, 1, 3, 2 },
-  { 0, 2, 3, 1 },
-  { 0, 2, 3, 1 },
-  { 3, 2, 0, 1 }
-};
-
-static ogg_int32_t QuadMapToIndex1( ogg_int32_t (*BlockMap)[4][4],
-                                    ogg_uint32_t SB, ogg_uint32_t MB,
-                                    ogg_uint32_t B ){
-  return BlockMap[SB][MBOrderMap[MB]][BlockOrderMap1[MB][B]];
-}
-
-static ogg_int32_t QuadMapToMBTopLeft( ogg_int32_t (*BlockMap)[4][4],
-                                       ogg_uint32_t SB, ogg_uint32_t MB ){
-  return BlockMap[SB][MBOrderMap[MB]][0];
-}

+ 0 - 99
Engine/lib/libtheora/lib/enc/blockmap.c

@@ -1,99 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: blockmap.c 14059 2007-10-28 23:43:27Z xiphmont $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static void CreateMapping ( ogg_int32_t (*BlockMap)[4][4],
-                            ogg_uint32_t FirstSB,
-                            ogg_uint32_t FirstFrag, ogg_uint32_t HFrags,
-                            ogg_uint32_t VFrags ){
-  ogg_uint32_t i, j = 0;
-  ogg_uint32_t xpos;
-  ogg_uint32_t ypos;
-  ogg_uint32_t SBrow, SBcol;
-  ogg_uint32_t SBRows, SBCols;
-  ogg_uint32_t MB, B;
-
-  ogg_uint32_t SB=FirstSB;
-  ogg_uint32_t FragIndex=FirstFrag;
-
-  /* Set Super-Block dimensions */
-  SBRows = VFrags/4 + ( VFrags%4 ? 1 : 0 );
-  SBCols = HFrags/4 + ( HFrags%4 ? 1 : 0 );
-
-  /* Map each Super-Block */
-  for ( SBrow=0; SBrow<SBRows; SBrow++ ){
-    for ( SBcol=0; SBcol<SBCols; SBcol++ ){
-      /* Y co-ordinate of Super-Block in Block units */
-      ypos = SBrow<<2;
-
-      /* Map Blocks within this Super-Block */
-      for ( i=0; (i<4) && (ypos<VFrags); i++, ypos++ ){
-        /* X co-ordinate of Super-Block in Block units */
-        xpos = SBcol<<2;
-
-        for ( j=0; (j<4) && (xpos<HFrags); j++, xpos++ ){
-          if ( i<2 ){
-            MB = ( j<2 ? 0 : 1 );
-          }else{
-            MB = ( j<2 ? 2 : 3 );
-          }
-
-          if ( i%2 ){
-            B = ( j%2 ? 3 : 2 );
-          }else{
-            B = ( j%2 ? 1 : 0 );
-          }
-
-          /* Set mapping and move to next fragment */
-          BlockMap[SB][MB][B] = FragIndex++;
-        }
-
-        /* Move to first fragment in next row in Super-Block */
-        FragIndex += HFrags-j;
-      }
-
-      /* Move on to next Super-Block */
-      SB++;
-      FragIndex -= i*HFrags-j;
-    }
-
-    /* Move to first Super-Block in next row */
-    FragIndex += 3*HFrags;
-  }
-}
-
-void CreateBlockMapping ( ogg_int32_t  (*BlockMap)[4][4],
-                          ogg_uint32_t YSuperBlocks,
-                          ogg_uint32_t UVSuperBlocks,
-                          ogg_uint32_t HFrags, ogg_uint32_t VFrags ) {
-  ogg_uint32_t i, j;
-
-  for ( i=0; i<YSuperBlocks + UVSuperBlocks * 2; i++ ){
-    for ( j=0; j<4; j++ ) {
-      BlockMap[i][j][0] = -1;
-      BlockMap[i][j][1] = -1;
-      BlockMap[i][j][2] = -1;
-      BlockMap[i][j][3] = -1;
-    }
-  }
-
-  CreateMapping ( BlockMap, 0, 0, HFrags, VFrags );
-  CreateMapping ( BlockMap, YSuperBlocks, HFrags*VFrags, HFrags/2, VFrags/2 );
-  CreateMapping ( BlockMap, YSuperBlocks + UVSuperBlocks, (HFrags*VFrags*5)/4,
-                  HFrags/2, VFrags/2 );
-}

+ 0 - 842
Engine/lib/libtheora/lib/enc/codec_internal.h

@@ -1,842 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2005                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: codec_internal.h 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#ifndef ENCODER_INTERNAL_H
-#define ENCODER_INTERNAL_H
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-typedef struct PB_INSTANCE PB_INSTANCE;
-#include "dsp.h"
-
-#include "theora/theora.h"
-#include "encoder_huffman.h"
-
-#define theora_read(x,y,z) ( *z = oggpackB_read(x,y) )
-
-#define CURRENT_ENCODE_VERSION   1
-#define HUGE_ERROR              (1<<28)  /*  Out of range test value */
-
-/* Baseline dct height and width. */
-#define BLOCK_HEIGHT_WIDTH          8
-#define HFRAGPIXELS                 8
-#define VFRAGPIXELS                 8
-
-/* Blocks on INTRA/INTER Y/U/V planes */
-enum BlockMode {
-  BLOCK_Y,
-  BLOCK_U,
-  BLOCK_V,
-  BLOCK_INTER_Y,
-  BLOCK_INTER_U,
-  BLOCK_INTER_V
-};
-
-/* Baseline dct block size */
-#define BLOCK_SIZE              (BLOCK_HEIGHT_WIDTH * BLOCK_HEIGHT_WIDTH)
-
-/* Border is for unrestricted mv's */
-#define UMV_BORDER              16
-#define STRIDE_EXTRA            (UMV_BORDER * 2)
-
-#define Q_TABLE_SIZE            64
-
-#define KEY_FRAME              0
-#define DELTA_FRAME            1
-
-#define MAX_MODES               8
-#define MODE_BITS               3
-#define MODE_METHODS            8
-#define MODE_METHOD_BITS        3
-
-/* Different key frame types/methods */
-#define DCT_KEY_FRAME           0
-
-#define KEY_FRAME_CONTEXT       5
-
-/* Preprocessor defines */
-#define MAX_PREV_FRAMES        16
-
-/* Number of search sites for a 4-step search (at pixel accuracy) */
-#define MAX_SEARCH_SITES       33
-
-#define VERY_BEST_Q            10
-#define MIN_BPB_FACTOR        0.3
-#define MAX_BPB_FACTOR        3.0
-
-#define MAX_MV_EXTENT 31  /* Max search distance in half pixel increments */
-
-typedef struct CONFIG_TYPE2{
-  double       OutputFrameRate;
-  ogg_uint32_t TargetBandwidth;
-  ogg_uint32_t KeyFrameDataTarget ;  /* Data rate target for key frames */
-
-  ogg_uint32_t FirstFrameQ;
-  ogg_uint32_t BaseQ;
-  ogg_uint32_t MaxQ;            /* Absolute Max Q allowed. */
-  ogg_uint32_t ActiveMaxQ;      /* Currently active Max Q */
-
-} CONFIG_TYPE2;
-
-typedef struct coeffNode{
-  int i;
-  struct coeffNode *next;
-} COEFFNODE;
-
-typedef struct{
-  unsigned char * Yuv0ptr;
-  unsigned char * Yuv1ptr;
-  unsigned char * SrfWorkSpcPtr;
-  unsigned char * disp_fragments;
-
-  ogg_uint32_t  * RegionIndex; /* Gives pixel index for top left of
-                                 each block */
-  ogg_uint32_t    VideoFrameHeight;
-  ogg_uint32_t    VideoFrameWidth;
-
-} SCAN_CONFIG_DATA;
-
-typedef unsigned char YUV_BUFFER_ENTRY;
-
-typedef struct{
-  ogg_int32_t   x;
-  ogg_int32_t   y;
-} MOTION_VECTOR;
-
-typedef MOTION_VECTOR COORDINATE;
-
-/** Quantizer matrix entry */
-typedef ogg_int16_t     Q_LIST_ENTRY;
-
-/** Decode Post-Processor instance */
-typedef struct PP_INSTANCE {
-  ogg_uint32_t  PrevFrameLimit;
-
-  ogg_uint32_t   *ScanPixelIndexTable;
-  signed char    *ScanDisplayFragments;
-
-  signed char    *PrevFragments[MAX_PREV_FRAMES];
-
-  ogg_uint32_t   *FragScores; /* The individual frame difference ratings. */
-  signed char    *SameGreyDirPixels;
-  signed char    *BarBlockMap;
-
-  /* Number of pixels changed by diff threshold in row of a fragment. */
-  unsigned char  *FragDiffPixels;
-
-  unsigned char  *PixelScores;
-  unsigned char  *PixelChangedMap;
-  unsigned char  *ChLocals;
-  ogg_int16_t    *yuv_differences;
-  ogg_int32_t    *RowChangedPixels;
-  signed char    *TmpCodedMap;
-
-  /* Plane pointers and dimension variables */
-  unsigned char  * YPlanePtr0;
-  unsigned char  * YPlanePtr1;
-  unsigned char  * UPlanePtr0;
-  unsigned char  * UPlanePtr1;
-  unsigned char  * VPlanePtr0;
-  unsigned char  * VPlanePtr1;
-
-  ogg_uint32_t    VideoYPlaneWidth;
-  ogg_uint32_t    VideoYPlaneHeight;
-  ogg_uint32_t    VideoUVPlaneWidth;
-  ogg_uint32_t    VideoUVPlaneHeight;
-
-  ogg_uint32_t    VideoYPlaneStride;
-  ogg_uint32_t    VideoUPlaneStride;
-  ogg_uint32_t    VideoVPlaneStride;
-
-  /* Scan control variables. */
-  unsigned char   HFragPixels;
-  unsigned char   VFragPixels;
-
-  ogg_uint32_t    ScanFrameFragments;
-  ogg_uint32_t    ScanYPlaneFragments;
-  ogg_uint32_t    ScanUVPlaneFragments;
-  ogg_uint32_t    ScanHFragments;
-  ogg_uint32_t    ScanVFragments;
-
-  ogg_uint32_t    YFramePixels;
-  ogg_uint32_t    UVFramePixels;
-
-  ogg_uint32_t    SgcThresh;
-
-  ogg_uint32_t    OutputBlocksUpdated;
-  ogg_uint32_t    KFIndicator;
-
-  /* The pre-processor scan configuration. */
-  SCAN_CONFIG_DATA ScanConfig;
-
-  ogg_int32_t   SRFGreyThresh;
-  ogg_int32_t   SRFColThresh;
-  ogg_int32_t   SgcLevelThresh;
-  ogg_int32_t   SuvcLevelThresh;
-
-  ogg_uint32_t  NoiseSupLevel;
-
-  /* Block Thresholds. */
-  ogg_uint32_t  PrimaryBlockThreshold;
-  unsigned char LineSearchTripTresh;
-
-  int   PAKEnabled;
-
-  int   LevelThresh;
-  int   NegLevelThresh;
-  int   SrfThresh;
-  int   NegSrfThresh;
-  int   HighChange;
-  int   NegHighChange;
-
-  /* Threshold lookup tables */
-  unsigned char SrfPakThreshTable[512];
-  unsigned char SrfThreshTable[512];
-  unsigned char SgcThreshTable[512];
-
-  /* Variables controlling S.A.D. break outs. */
-  ogg_uint32_t GrpLowSadThresh;
-  ogg_uint32_t GrpHighSadThresh;
-  ogg_uint32_t ModifiedGrpLowSadThresh;
-  ogg_uint32_t ModifiedGrpHighSadThresh;
-
-  ogg_int32_t  PlaneHFragments;
-  ogg_int32_t  PlaneVFragments;
-  ogg_int32_t  PlaneHeight;
-  ogg_int32_t  PlaneWidth;
-  ogg_int32_t  PlaneStride;
-
-  ogg_uint32_t BlockThreshold;
-  ogg_uint32_t BlockSgcThresh;
-  double UVBlockThreshCorrection;
-  double UVSgcCorrection;
-
-  double YUVPlaneCorrectionFactor;
-  double AbsDiff_ScoreMultiplierTable[256];
-  unsigned char  NoiseScoreBoostTable[256];
-  unsigned char  MaxLineSearchLen;
-
-  ogg_int32_t YuvDiffsCircularBufferSize;
-  ogg_int32_t ChLocalsCircularBufferSize;
-  ogg_int32_t PixelMapCircularBufferSize;
-
-  DspFunctions dsp;  /* Selected functions for this platform */
-
-} PP_INSTANCE;
-
-/** block coding modes */
-typedef enum{
-  CODE_INTER_NO_MV        = 0x0, /* INTER prediction, (0,0) motion
-                                    vector implied.  */
-    CODE_INTRA            = 0x1, /* INTRA i.e. no prediction. */
-    CODE_INTER_PLUS_MV    = 0x2, /* INTER prediction, non zero motion
-                                    vector. */
-    CODE_INTER_LAST_MV    = 0x3, /* Use Last Motion vector */
-    CODE_INTER_PRIOR_LAST = 0x4, /* Prior last motion vector */
-    CODE_USING_GOLDEN     = 0x5, /* 'Golden frame' prediction (no MV). */
-    CODE_GOLDEN_MV        = 0x6, /* 'Golden frame' prediction plus MV. */
-    CODE_INTER_FOURMV     = 0x7  /* Inter prediction 4MV per macro block. */
-} CODING_MODE;
-
-/** Huffman table entry */
-typedef struct HUFF_ENTRY {
-  struct HUFF_ENTRY *ZeroChild;
-  struct HUFF_ENTRY *OneChild;
-  struct HUFF_ENTRY *Previous;
-  struct HUFF_ENTRY *Next;
-  ogg_int32_t        Value;
-  ogg_uint32_t       Frequency;
-
-} HUFF_ENTRY;
-
-typedef struct qmat_range_table {
-  int startq, startqi; /* index where this range starts */
-  Q_LIST_ENTRY *qmat;  /* qmat at this range boundary */
-} qmat_range_table;
-
-/** codec setup data, maps to the third bitstream header */
-typedef struct codec_setup_info {
-  ogg_uint32_t QThreshTable[Q_TABLE_SIZE];
-  Q_LIST_ENTRY DcScaleFactorTable[Q_TABLE_SIZE];
-
-  int MaxQMatrixIndex;
-  Q_LIST_ENTRY *qmats;
-  qmat_range_table *range_table[6];
-
-  HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES];
-
-} codec_setup_info;
-
-/** Decoder (Playback) instance -- installed in a theora_state */
-struct PB_INSTANCE {
-  oggpack_buffer *opb;
-  theora_info     info;
-
-  /* flag to indicate if the headers already have been written */
-  int            HeadersWritten;
-
-  /* how far do we shift the granulepos to seperate out P frame counts? */
-  int             keyframe_granule_shift;
-
-
-  /***********************************************************************/
-  /* Decoder and Frame Type Information */
-
-  int           DecoderErrorCode;
-  int           FramesHaveBeenSkipped;
-
-  int           PostProcessEnabled;
-  ogg_uint32_t  PostProcessingLevel;    /* Perform post processing */
-
-  /* Frame Info */
-  CODING_MODE   CodingMode;
-  unsigned char FrameType;
-  unsigned char KeyFrameType;
-  ogg_uint32_t  QualitySetting;
-  ogg_uint32_t  FrameQIndex;            /* Quality specified as a
-                                           table index */
-  ogg_uint32_t  ThisFrameQualityValue;  /* Quality value for this frame  */
-  ogg_uint32_t  LastFrameQualityValue;  /* Last Frame's Quality */
-  ogg_int32_t   CodedBlockIndex;        /* Number of Coded Blocks */
-  ogg_uint32_t  CodedBlocksThisFrame;   /* Index into coded blocks */
-  ogg_uint32_t  FrameSize;              /* The number of bytes in the frame. */
-
-  /**********************************************************************/
-  /* Frame Size & Index Information */
-
-  ogg_uint32_t  YPlaneSize;
-  ogg_uint32_t  UVPlaneSize;
-  ogg_uint32_t  YStride;
-  ogg_uint32_t  UVStride;
-  ogg_uint32_t  VFragments;
-  ogg_uint32_t  HFragments;
-  ogg_uint32_t  UnitFragments;
-  ogg_uint32_t  YPlaneFragments;
-  ogg_uint32_t  UVPlaneFragments;
-
-  ogg_uint32_t  ReconYPlaneSize;
-  ogg_uint32_t  ReconUVPlaneSize;
-
-  ogg_uint32_t  YDataOffset;
-  ogg_uint32_t  UDataOffset;
-  ogg_uint32_t  VDataOffset;
-  ogg_uint32_t  ReconYDataOffset;
-  ogg_uint32_t  ReconUDataOffset;
-  ogg_uint32_t  ReconVDataOffset;
-  ogg_uint32_t  YSuperBlocks;   /* Number of SuperBlocks in a Y frame */
-  ogg_uint32_t  UVSuperBlocks;  /* Number of SuperBlocks in a U or V frame */
-  ogg_uint32_t  SuperBlocks;    /* Total number of SuperBlocks in a
-                                   Y,U,V frame */
-
-  ogg_uint32_t  YSBRows;        /* Number of rows of SuperBlocks in a
-                                   Y frame */
-  ogg_uint32_t  YSBCols;        /* Number of cols of SuperBlocks in a
-                                   Y frame */
-  ogg_uint32_t  UVSBRows;       /* Number of rows of SuperBlocks in a
-                                   U or V frame */
-  ogg_uint32_t  UVSBCols;       /* Number of cols of SuperBlocks in a
-                                   U or V frame */
-
-  ogg_uint32_t  MacroBlocks;    /* Total number of Macro-Blocks */
-
-  /**********************************************************************/
-  /* Frames  */
-  YUV_BUFFER_ENTRY *ThisFrameRecon;
-  YUV_BUFFER_ENTRY *GoldenFrame;
-  YUV_BUFFER_ENTRY *LastFrameRecon;
-  YUV_BUFFER_ENTRY *PostProcessBuffer;
-
-  /**********************************************************************/
-  /* Fragment Information */
-  ogg_uint32_t  *pixel_index_table;        /* start address of first
-                                              pixel of fragment in
-                                              source */
-  ogg_uint32_t  *recon_pixel_index_table;  /* start address of first
-                                              pixel in recon buffer */
-
-  unsigned char *display_fragments;        /* Fragment update map */
-  unsigned char *skipped_display_fragments;/* whether fragment YUV
-                                              Conversion and update is to be
-                                              skipped */
-  ogg_int32_t   *CodedBlockList;           /* A list of fragment indices for
-                                              coded blocks. */
-  MOTION_VECTOR *FragMVect;                /* Frag motion vectors */
-
-  ogg_uint32_t  *FragTokenCounts;          /* Number of tokens per fragment */
-  ogg_uint32_t  (*TokenList)[128];         /* Fragment Token Pointers */
-
-  ogg_int32_t   *FragmentVariances;
-  ogg_uint32_t  *FragQIndex;               /* Fragment Quality used in
-                                              PostProcess */
-  Q_LIST_ENTRY (*PPCoefBuffer)[64];        /* PostProcess Buffer for
-                                              coefficients data */
-
-  unsigned char *FragCoeffs;                /* # of coeffs decoded so far for
-                                               fragment */
-  unsigned char *FragCoefEOB;               /* Position of last non 0 coef
-                                                within QFragData */
-  Q_LIST_ENTRY (*QFragData)[64];            /* Fragment Coefficients
-                                               Array Pointers */
-  CODING_MODE   *FragCodingMethod;          /* coding method for the
-                                               fragment */
-
-  /***********************************************************************/
-  /* pointers to addresses used for allocation and deallocation the
-      others are rounded up to the nearest 32 bytes */
-
-  COEFFNODE     *_Nodes;
-  ogg_uint32_t  *transIndex;                    /* ptr to table of
-                                                   transposed indexes */
-
-  /***********************************************************************/
-  ogg_int32_t    bumpLast;
-
-  /* Macro Block and SuperBlock Information */
-  ogg_int32_t  (*BlockMap)[4][4];               /* super block + sub macro
-                                                   block + sub frag ->
-                                                   FragIndex */
-
-  /* Coded flag arrays and counters for them */
-  unsigned char *SBCodedFlags;
-  unsigned char *SBFullyFlags;
-  unsigned char *MBCodedFlags;
-  unsigned char *MBFullyFlags;
-
-  /**********************************************************************/
-  ogg_uint32_t   EOB_Run;
-
-  COORDINATE    *FragCoordinates;
-  MOTION_VECTOR  MVector;
-  ogg_int32_t    ReconPtr2Offset;       /* Offset for second reconstruction
-                                           in half pixel MC */
-  Q_LIST_ENTRY  *quantized_list;
-  ogg_int16_t   *ReconDataBuffer;
-  Q_LIST_ENTRY   InvLastIntraDC;
-  Q_LIST_ENTRY   InvLastInterDC;
-  Q_LIST_ENTRY   LastIntraDC;
-  Q_LIST_ENTRY   LastInterDC;
-
-  ogg_uint32_t   BlocksToDecode;        /* Blocks to be decoded this frame */
-  ogg_uint32_t   DcHuffChoice;          /* Huffman table selection variables */
-  unsigned char  ACHuffChoice;
-  ogg_uint32_t   QuadMBListIndex;
-
-  ogg_int32_t    ByteCount;
-
-  ogg_uint32_t   bit_pattern;
-  unsigned char  bits_so_far;
-  unsigned char  NextBit;
-  ogg_int32_t    BitsLeft;
-
-  ogg_int16_t   *DequantBuffer;
-
-  ogg_int32_t    fp_quant_InterUV_coeffs[64];
-  ogg_int32_t    fp_quant_InterUV_round[64];
-  ogg_int32_t    fp_ZeroBinSize_InterUV[64];
-
-  ogg_int16_t   *TmpReconBuffer;
-  ogg_int16_t   *TmpDataBuffer;
-
-  /* Loop filter bounding values */
-  ogg_int16_t    FiltBoundingValue[256];
-
-  /* Naming convention for all quant matrices and related data structures:
-   * Fields containing "Inter" in their name are for Inter frames, the
-   * rest is Intra. */
-
-  /* Dequantiser and rounding tables */
-  ogg_uint16_t   *QThreshTable;
-  Q_LIST_ENTRY  dequant_Y_coeffs[64];
-  Q_LIST_ENTRY  dequant_U_coeffs[64];
-  Q_LIST_ENTRY  dequant_V_coeffs[64];
-  Q_LIST_ENTRY  dequant_InterY_coeffs[64];
-  Q_LIST_ENTRY  dequant_InterU_coeffs[64];
-  Q_LIST_ENTRY  dequant_InterV_coeffs[64];
-
-  Q_LIST_ENTRY  *dequant_coeffs;        /* currently active quantizer */
-  unsigned int   zigzag_index[64];
-
-  HUFF_ENTRY    *HuffRoot_VP3x[NUM_HUFF_TABLES];
-  ogg_uint32_t  *HuffCodeArray_VP3x[NUM_HUFF_TABLES];
-  unsigned char *HuffCodeLengthArray_VP3x[NUM_HUFF_TABLES];
-  const unsigned char *ExtraBitLengths_VP3x;
-
-  th_quant_info   quant_info;
-  oc_quant_tables quant_tables[2][3];
-
-  /* Quantiser and rounding tables */
-  /* this is scheduled to be replaced a new mechanism
-     that will simply reuse the dequantizer information. */
-  ogg_int32_t    fp_quant_Y_coeffs[64]; /* used in reiniting quantizers */
-  ogg_int32_t    fp_quant_U_coeffs[64];
-  ogg_int32_t    fp_quant_V_coeffs[64];
-  ogg_int32_t    fp_quant_Inter_Y_coeffs[64];
-  ogg_int32_t    fp_quant_Inter_U_coeffs[64];
-  ogg_int32_t    fp_quant_Inter_V_coeffs[64];
-
-  ogg_int32_t    fp_quant_Y_round[64];
-  ogg_int32_t    fp_quant_U_round[64];
-  ogg_int32_t    fp_quant_V_round[64];
-  ogg_int32_t    fp_quant_Inter_Y_round[64];
-  ogg_int32_t    fp_quant_Inter_U_round[64];
-  ogg_int32_t    fp_quant_Inter_V_round[64];
-
-  ogg_int32_t    fp_ZeroBinSize_Y[64];
-  ogg_int32_t    fp_ZeroBinSize_U[64];
-  ogg_int32_t    fp_ZeroBinSize_V[64];
-  ogg_int32_t    fp_ZeroBinSize_Inter_Y[64];
-  ogg_int32_t    fp_ZeroBinSize_Inter_U[64];
-  ogg_int32_t    fp_ZeroBinSize_Inter_V[64];
-
-  ogg_int32_t   *fquant_coeffs;
-  ogg_int32_t   *fquant_round;
-  ogg_int32_t   *fquant_ZbSize;
-
-  /* Predictor used in choosing entropy table for decoding block patterns. */
-  unsigned char  BlockPatternPredictor;
-
-  short          Modifier[4][512];
-  short         *ModifierPointer[4];
-
-  unsigned char *DataOutputInPtr;
-
-  DspFunctions   dsp;  /* Selected functions for this platform */
-
-};
-
-/* Encoder (Compressor) instance -- installed in a theora_state */
-typedef struct CP_INSTANCE {
-  /*This structure must be first.
-    It contains entry points accessed by the decoder library's API wrapper, and
-     is the only assumption that library makes about our internal format.*/
-  oc_state_dispatch_vtbl dispatch_vtbl;
-
-  /* Compressor Configuration */
-  SCAN_CONFIG_DATA ScanConfig;
-  CONFIG_TYPE2     Configuration;
-  int              GoldenFrameEnabled;
-  int              InterPrediction;
-  int              MotionCompensation;
-
-  ogg_uint32_t     LastKeyFrame ;
-  ogg_int32_t      DropCount ;
-  ogg_int32_t      MaxConsDroppedFrames ;
-  ogg_int32_t      DropFrameTriggerBytes;
-  int              DropFrameCandidate;
-
-  /* Compressor Statistics */
-  double           TotErrScore;
-  ogg_int64_t      KeyFrameCount; /* Count of key frames. */
-  ogg_int64_t      TotKeyFrameBytes;
-  ogg_uint32_t     LastKeyFrameSize;
-  ogg_uint32_t     PriorKeyFrameSize[KEY_FRAME_CONTEXT];
-  ogg_uint32_t     PriorKeyFrameDistance[KEY_FRAME_CONTEXT];
-  ogg_int32_t      FrameQuality[6];
-  int              DecoderErrorCode; /* Decoder error flag. */
-  ogg_int32_t      ThreshMapThreshold;
-  ogg_int32_t      TotalMotionScore;
-  ogg_int64_t      TotalByteCount;
-  ogg_int32_t      FixedQ;
-
-  /* Frame Statistics  */
-  signed char      InterCodeCount;
-  ogg_int64_t      CurrentFrame;
-  ogg_int64_t      CarryOver ;
-  ogg_uint32_t     LastFrameSize;
-  ogg_uint32_t     FrameBitCount;
-  int              ThisIsFirstFrame;
-  int              ThisIsKeyFrame;
-
-  ogg_int32_t      MotionScore;
-  ogg_uint32_t     RegulationBlocks;
-  ogg_int32_t      RecoveryMotionScore;
-  int              RecoveryBlocksAdded ;
-  double           ProportionRecBlocks;
-  double           MaxRecFactor ;
-
-  /* Rate Targeting variables. */
-  ogg_uint32_t     ThisFrameTargetBytes;
-  double           BpbCorrectionFactor;
-
-  /* Up regulation variables */
-  ogg_uint32_t     FinalPassLastPos;  /* Used to regulate a final
-                                         unrestricted high quality
-                                         pass. */
-  ogg_uint32_t     LastEndSB;         /* Where we were in the loop
-                                         last time. */
-  ogg_uint32_t     ResidueLastEndSB;  /* Where we were in the residue
-                                         update loop last time. */
-
-  /* Controlling Block Selection */
-  ogg_uint32_t     MVChangeFactor;
-  ogg_uint32_t     FourMvChangeFactor;
-  ogg_uint32_t     MinImprovementForNewMV;
-  ogg_uint32_t     ExhaustiveSearchThresh;
-  ogg_uint32_t     MinImprovementForFourMV;
-  ogg_uint32_t     FourMVThreshold;
-
-  /* Module shared data structures. */
-  ogg_int32_t      frame_target_rate;
-  ogg_int32_t      BaseLineFrameTargetRate;
-  ogg_int32_t      min_blocks_per_frame;
-  ogg_uint32_t     tot_bytes_old;
-
-  /*********************************************************************/
-  /* Frames  Used in the selecetive convolution filtering of the Y plane. */
-  unsigned char    *ConvDestBuffer;
-  YUV_BUFFER_ENTRY *yuv0ptr;
-  YUV_BUFFER_ENTRY *yuv1ptr;
-  /*********************************************************************/
-
-  /*********************************************************************/
-  /* Token Buffers */
-  ogg_uint32_t     *OptimisedTokenListEb; /* Optimised token list extra bits */
-  unsigned char    *OptimisedTokenList;   /* Optimised token list. */
-  unsigned char    *OptimisedTokenListHi; /* Optimised token list huffman
-                                             table index */
-
-  unsigned char    *OptimisedTokenListPl; /* Plane to which the token
-                                             belongs Y = 0 or UV = 1 */
-  ogg_int32_t       OptimisedTokenCount;           /* Count of Optimized tokens */
-  ogg_uint32_t      RunHuffIndex;         /* Huffman table in force at
-                                             the start of a run */
-  ogg_uint32_t      RunPlaneIndex;        /* The plane (Y=0 UV=1) to
-                                             which the first token in
-                                             an EOB run belonged. */
-
-
-  ogg_uint32_t      TotTokenCount;
-  ogg_int32_t       TokensToBeCoded;
-  ogg_int32_t       TokensCoded;
-  /********************************************************************/
-
-  /* SuperBlock, MacroBLock and Fragment Information */
-  /* Coded flag arrays and counters for them */
-  unsigned char    *PartiallyCodedFlags;
-  unsigned char    *PartiallyCodedMbPatterns;
-  unsigned char    *UncodedMbFlags;
-
-  unsigned char    *extra_fragments;   /* extra updates not
-                                          recommended by pre-processor */
-  ogg_int16_t      *OriginalDC;
-
-  ogg_uint32_t     *FragmentLastQ;     /* Array used to keep track of
-                                          quality at which each
-                                          fragment was last
-                                          updated. */
-  unsigned char    *FragTokens;
-  ogg_uint32_t     *FragTokenCounts;   /* Number of tokens per fragment */
-
-  ogg_uint32_t     *RunHuffIndices;
-  ogg_uint32_t     *LastCodedErrorScore;
-  ogg_uint32_t     *ModeList;
-  MOTION_VECTOR    *MVList;
-
-  unsigned char    *BlockCodedFlags;
-
-  ogg_uint32_t      MvListCount;
-  ogg_uint32_t      ModeListCount;
-
-
-  unsigned char    *DataOutputBuffer;
-  /*********************************************************************/
-
-  ogg_uint32_t      RunLength;
-  ogg_uint32_t      MaxBitTarget;     /* Cut off target for rate capping */
-  double            BitRateCapFactor; /* Factor relating delta frame target
-                                         to cut off target. */
-
-  unsigned char     MBCodingMode;     /* Coding mode flags */
-
-  ogg_int32_t       MVPixelOffsetY[MAX_SEARCH_SITES];
-  ogg_uint32_t      InterTripOutThresh;
-  unsigned char     MVEnabled;
-  ogg_uint32_t      MotionVectorSearchCount;
-  ogg_uint32_t      FrameMVSearcOunt;
-  ogg_int32_t       MVSearchSteps;
-  ogg_int32_t       MVOffsetX[MAX_SEARCH_SITES];
-  ogg_int32_t       MVOffsetY[MAX_SEARCH_SITES];
-  ogg_int32_t       HalfPixelRef2Offset[9]; /* Offsets for half pixel
-                                               compensation */
-  signed char       HalfPixelXOffset[9];    /* Half pixel MV offsets for X */
-  signed char       HalfPixelYOffset[9];    /* Half pixel MV offsets for Y */
-
-  ogg_uint32_t      bit_pattern ;
-  unsigned char     bits_so_far ;
-  ogg_uint32_t      lastval ;
-  ogg_uint32_t      lastrun ;
-
-  Q_LIST_ENTRY     *quantized_list;
-
-  MOTION_VECTOR     MVector;
-  ogg_uint32_t      TempBitCount;
-  ogg_int16_t      *DCT_codes; /* Buffer that stores the result of
-                                  Forward DCT */
-  ogg_int16_t      *DCTDataBuffer; /* Input data buffer for Forward DCT */
-
-  /* Motion compensation related variables */
-  ogg_uint32_t      MvMaxExtent;
-
-  double            QTargetModifier[Q_TABLE_SIZE];
-
-  /* instances (used for reconstructing buffers and to hold tokens etc.) */
-  PP_INSTANCE       pp;   /* preprocessor */
-  PB_INSTANCE       pb;   /* playback */
-
-  /* ogg bitpacker for use in packet coding, other API state */
-  oggpack_buffer   *oggbuffer;
-  int               readyflag;
-  int               packetflag;
-  int               doneflag;
-
-  DspFunctions   dsp;  /* Selected functions for this platform */
-
-} CP_INSTANCE;
-
-#define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255))))
-
-extern void ConfigurePP( PP_INSTANCE *ppi, int Level ) ;
-extern ogg_uint32_t YUVAnalyseFrame( PP_INSTANCE *ppi,
-                                     ogg_uint32_t * KFIndicator );
-
-extern void ClearPPInstance(PP_INSTANCE *ppi);
-extern void InitPPInstance(PP_INSTANCE *ppi, DspFunctions *funcs);
-extern void InitPBInstance(PB_INSTANCE *pbi);
-extern void ClearPBInstance(PB_INSTANCE *pbi);
-
-extern void IDct1( Q_LIST_ENTRY * InputData,
-                   ogg_int16_t *QuantMatrix,
-                   ogg_int16_t * OutputData );
-
-extern void ReconIntra( PB_INSTANCE *pbi, unsigned char * ReconPtr,
-                        ogg_int16_t * ChangePtr, ogg_uint32_t LineStep );
-
-extern void ReconInter( PB_INSTANCE *pbi, unsigned char * ReconPtr,
-                        unsigned char * RefPtr, ogg_int16_t * ChangePtr,
-                        ogg_uint32_t LineStep ) ;
-
-extern void ReconInterHalfPixel2( PB_INSTANCE *pbi, unsigned char * ReconPtr,
-                                  unsigned char * RefPtr1,
-                                  unsigned char * RefPtr2,
-                                  ogg_int16_t * ChangePtr,
-                                  ogg_uint32_t LineStep ) ;
-
-extern void SetupLoopFilter(PB_INSTANCE *pbi);
-extern void CopyBlock(unsigned char *src,
-                      unsigned char *dest,
-                      unsigned int srcstride);
-extern void LoopFilter(PB_INSTANCE *pbi);
-extern void ReconRefFrames (PB_INSTANCE *pbi);
-extern void ExpandToken( Q_LIST_ENTRY * ExpandedBlock,
-                         unsigned char * CoeffIndex, ogg_uint32_t Token,
-                         ogg_int32_t ExtraBits );
-extern void ClearDownQFragData(PB_INSTANCE *pbi);
-
-extern void select_quantiser (PB_INSTANCE *pbi, int type);
-
-extern void quantize( PB_INSTANCE *pbi,
-                      ogg_int16_t * DCT_block,
-                      Q_LIST_ENTRY * quantized_list);
-extern void UpdateQ( PB_INSTANCE *pbi, int NewQIndex );
-extern void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ );
-extern void fdct_short ( ogg_int16_t * InputData, ogg_int16_t * OutputData );
-extern ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi,
-                                       ogg_int32_t FragIndex);
-extern void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex,
-                                    ogg_uint32_t PixelsPerLine ) ;
-extern void ClearFragmentInfo(PB_INSTANCE * pbi);
-extern void InitFragmentInfo(PB_INSTANCE * pbi);
-extern void ClearFrameInfo(PB_INSTANCE * pbi);
-extern void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize);
-extern void InitializeFragCoordinates(PB_INSTANCE *pbi);
-extern void InitFrameDetails(PB_INSTANCE *pbi);
-extern void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer *opb);
-extern void InitQTables( PB_INSTANCE *pbi );
-extern void quant_tables_init( PB_INSTANCE *pbi, const th_quant_info *qinfo);
-extern void InitHuffmanSet( PB_INSTANCE *pbi );
-extern void ClearHuffmanSet( PB_INSTANCE *pbi );
-extern int  ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb);
-extern void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
-                              oggpack_buffer *opb);
-extern void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci);
-extern void ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]);
-extern int  ReadFilterTables(codec_setup_info *ci, oggpack_buffer *opb);
-extern void QuadDecodeDisplayFragments ( PB_INSTANCE *pbi );
-extern void PackAndWriteDFArray( CP_INSTANCE *cpi );
-extern void UpdateFragQIndex(PB_INSTANCE *pbi);
-extern void PostProcess(PB_INSTANCE *pbi);
-extern void InitMotionCompensation ( CP_INSTANCE *cpi );
-extern ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex,
-                                     ogg_uint32_t PixelsPerLine ) ;
-extern ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi,
-                                     unsigned char * SrcPtr,
-                                     unsigned char * RefPtr,
-                                     ogg_uint32_t FragIndex,
-                                     ogg_int32_t LastXMV,
-                                     ogg_int32_t LastYMV,
-                                     ogg_uint32_t PixelsPerLine ) ;
-extern void WriteFrameHeader( CP_INSTANCE *cpi) ;
-extern ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi,
-                                       unsigned char * RefFramePtr,
-                                       ogg_uint32_t FragIndex,
-                                       ogg_uint32_t PixelsPerLine,
-                                       ogg_int32_t *MVPixelOffset,
-                                       MOTION_VECTOR *MV );
-extern ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi,
-                                             unsigned char * RefFramePtr,
-                                             ogg_uint32_t FragIndex,
-                                             ogg_uint32_t PixelsPerLine,
-                                             MOTION_VECTOR *MV );
-extern ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi,
-                                               unsigned char * RefFramePtr,
-                                               ogg_uint32_t FragIndex,
-                                               ogg_uint32_t PixelsPerLine,
-                                               MOTION_VECTOR *MV ) ;
-extern ogg_uint32_t EncodeData(CP_INSTANCE *cpi);
-extern ogg_uint32_t PickIntra( CP_INSTANCE *cpi,
-                               ogg_uint32_t SBRows,
-                               ogg_uint32_t SBCols);
-extern ogg_uint32_t PickModes(CP_INSTANCE *cpi,
-                              ogg_uint32_t SBRows,
-                              ogg_uint32_t SBCols,
-                              ogg_uint32_t PixelsPerLine,
-                              ogg_uint32_t *InterError,
-                              ogg_uint32_t *IntraError);
-
-extern CODING_MODE FrArrayUnpackMode(PB_INSTANCE *pbi);
-extern void CreateBlockMapping ( ogg_int32_t  (*BlockMap)[4][4],
-                                 ogg_uint32_t YSuperBlocks,
-                                 ogg_uint32_t UVSuperBlocks,
-                                 ogg_uint32_t HFrags, ogg_uint32_t VFrags );
-extern void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
-                                  ogg_int32_t RecoveryBlocks ) ;
-extern void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore );
-extern void CopyBackExtraFrags(CP_INSTANCE *cpi);
-
-extern void UpdateUMVBorder( PB_INSTANCE *pbi,
-                             unsigned char * DestReconPtr );
-extern void PInitFrameInfo(PP_INSTANCE * ppi);
-
-extern double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ );
-extern void ClearTmpBuffers(PB_INSTANCE * pbi);
-extern void InitTmpBuffers(PB_INSTANCE * pbi);
-extern void ScanYUVInit( PP_INSTANCE *  ppi,
-                         SCAN_CONFIG_DATA * ScanConfigPtr);
-
-#endif /* ENCODER_INTERNAL_H */

+ 0 - 268
Engine/lib/libtheora/lib/enc/dct.c

@@ -1,268 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dct.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-#include "dsp.h"
-#include "../cpu.h"
-
-static ogg_int32_t xC1S7 = 64277;
-static ogg_int32_t xC2S6 = 60547;
-static ogg_int32_t xC3S5 = 54491;
-static ogg_int32_t xC4S4 = 46341;
-static ogg_int32_t xC5S3 = 36410;
-static ogg_int32_t xC6S2 = 25080;
-static ogg_int32_t xC7S1 = 12785;
-
-#define SIGNBITDUPPED(X) ((signed )(((X) & 0x80000000)) >> 31)
-#define DOROUND(X) ( (SIGNBITDUPPED(X) & (0xffff)) + (X) )
-
-static void fdct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ){
-  int loop;
-
-  ogg_int32_t  is07, is12, is34, is56;
-  ogg_int32_t  is0734, is1256;
-  ogg_int32_t  id07, id12, id34, id56;
-
-  ogg_int32_t  irot_input_x, irot_input_y;
-  ogg_int32_t  icommon_product1;   /* Re-used product  (c4s4 * (s12 - s56)). */
-  ogg_int32_t  icommon_product2;   /* Re-used product  (c4s4 * (d12 + d56)). */
-
-  ogg_int32_t  temp1, temp2;         /* intermediate variable for computation */
-
-  ogg_int32_t  InterData[64];
-  ogg_int32_t *ip = InterData;
-  ogg_int16_t * op = OutputData;
-  for (loop = 0; loop < 8; loop++){
-    /* Pre calculate some common sums and differences. */
-    is07 = InputData[0] + InputData[7];
-    is12 = InputData[1] + InputData[2];
-    is34 = InputData[3] + InputData[4];
-    is56 = InputData[5] + InputData[6];
-
-    id07 = InputData[0] - InputData[7];
-    id12 = InputData[1] - InputData[2];
-    id34 = InputData[3] - InputData[4];
-    id56 = InputData[5] - InputData[6];
-
-    is0734 = is07 + is34;
-    is1256 = is12 + is56;
-
-    /* Pre-Calculate some common product terms. */
-    icommon_product1 = xC4S4*(is12 - is56);
-    icommon_product1 = DOROUND(icommon_product1);
-    icommon_product1>>=16;
-
-    icommon_product2 = xC4S4*(id12 + id56);
-    icommon_product2 = DOROUND(icommon_product2);
-    icommon_product2>>=16;
-
-
-    ip[0] = (xC4S4*(is0734 + is1256));
-    ip[0] = DOROUND(ip[0]);
-    ip[0] >>= 16;
-
-    ip[4] = (xC4S4*(is0734 - is1256));
-    ip[4] = DOROUND(ip[4]);
-    ip[4] >>= 16;
-
-    /* Define inputs to rotation for outputs 2 and 6 */
-    irot_input_x = id12 - id56;
-    irot_input_y = is07 - is34;
-
-    /* Apply rotation for outputs 2 and 6.  */
-    temp1=xC6S2*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC2S6*irot_input_y;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    ip[2] = temp1 + temp2;
-
-    temp1=xC6S2*irot_input_y;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC2S6*irot_input_x ;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    ip[6] = temp1 -temp2 ;
-
-    /* Define inputs to rotation for outputs 1 and 7  */
-    irot_input_x = icommon_product1 + id07;
-    irot_input_y = -( id34 + icommon_product2 );
-
-    /* Apply rotation for outputs 1 and 7.  */
-
-    temp1=xC1S7*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC7S1*irot_input_y;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    ip[1] = temp1 - temp2;
-
-    temp1=xC7S1*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC1S7*irot_input_y ;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    ip[7] = temp1 + temp2 ;
-
-    /* Define inputs to rotation for outputs 3 and 5 */
-    irot_input_x = id07 - icommon_product1;
-    irot_input_y = id34 - icommon_product2;
-
-    /* Apply rotation for outputs 3 and 5. */
-    temp1=xC3S5*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC5S3*irot_input_y ;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    ip[3] = temp1 - temp2 ;
-
-    temp1=xC5S3*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC3S5*irot_input_y;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    ip[5] = temp1 + temp2;
-
-    /* Increment data pointer for next row. */
-    InputData += 8 ;
-    ip += 8; /* advance pointer to next row */
-
-  }
-
-
-  /* Performed DCT on rows, now transform the columns */
-  ip = InterData;
-  for (loop = 0; loop < 8; loop++){
-    /* Pre calculate some common sums and differences.  */
-    is07 = ip[0 * 8] + ip[7 * 8];
-    is12 = ip[1 * 8] + ip[2 * 8];
-    is34 = ip[3 * 8] + ip[4 * 8];
-    is56 = ip[5 * 8] + ip[6 * 8];
-
-    id07 = ip[0 * 8] - ip[7 * 8];
-    id12 = ip[1 * 8] - ip[2 * 8];
-    id34 = ip[3 * 8] - ip[4 * 8];
-    id56 = ip[5 * 8] - ip[6 * 8];
-
-    is0734 = is07 + is34;
-    is1256 = is12 + is56;
-
-    /* Pre-Calculate some common product terms. */
-    icommon_product1 = xC4S4*(is12 - is56) ;
-    icommon_product2 = xC4S4*(id12 + id56) ;
-    icommon_product1 = DOROUND(icommon_product1);
-    icommon_product2 = DOROUND(icommon_product2);
-    icommon_product1>>=16;
-    icommon_product2>>=16;
-
-
-    temp1 = xC4S4*(is0734 + is1256) ;
-    temp2 = xC4S4*(is0734 - is1256) ;
-    temp1 = DOROUND(temp1);
-    temp2 = DOROUND(temp2);
-    temp1>>=16;
-    temp2>>=16;
-    op[0*8] = (ogg_int16_t) temp1;
-    op[4*8] = (ogg_int16_t) temp2;
-
-    /* Define inputs to rotation for outputs 2 and 6 */
-    irot_input_x = id12 - id56;
-    irot_input_y = is07 - is34;
-
-    /* Apply rotation for outputs 2 and 6.  */
-    temp1=xC6S2*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC2S6*irot_input_y;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    op[2*8] = (ogg_int16_t) (temp1 + temp2);
-
-    temp1=xC6S2*irot_input_y;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC2S6*irot_input_x ;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    op[6*8] = (ogg_int16_t) (temp1 -temp2) ;
-
-    /* Define inputs to rotation for outputs 1 and 7 */
-    irot_input_x = icommon_product1 + id07;
-    irot_input_y = -( id34 + icommon_product2 );
-
-    /* Apply rotation for outputs 1 and 7. */
-    temp1=xC1S7*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC7S1*irot_input_y;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    op[1*8] = (ogg_int16_t) (temp1 - temp2);
-
-    temp1=xC7S1*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC1S7*irot_input_y ;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    op[7*8] = (ogg_int16_t) (temp1 + temp2);
-
-    /* Define inputs to rotation for outputs 3 and 5 */
-    irot_input_x = id07 - icommon_product1;
-    irot_input_y = id34 - icommon_product2;
-
-    /* Apply rotation for outputs 3 and 5. */
-    temp1=xC3S5*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC5S3*irot_input_y ;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    op[3*8] = (ogg_int16_t) (temp1 - temp2) ;
-
-    temp1=xC5S3*irot_input_x;
-    temp1=DOROUND(temp1);
-    temp1>>=16;
-    temp2=xC3S5*irot_input_y;
-    temp2=DOROUND(temp2);
-    temp2>>=16;
-    op[5*8] = (ogg_int16_t) (temp1 + temp2);
-
-    /* Increment data pointer for next column.  */
-    ip ++;
-    op ++;
-  }
-}
-
-void dsp_dct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
-  funcs->fdct_short = fdct_short__c;
-  dsp_dct_decode_init(funcs, cpu_flags);
-  dsp_idct_init(funcs, cpu_flags);
-#if defined(USE_ASM)
-  if (cpu_flags & OC_CPU_X86_MMX) {
-    dsp_mmx_fdct_init(funcs);
-  }
-#endif
-}
-

+ 0 - 941
Engine/lib/libtheora/lib/enc/dct_decode.c

@@ -1,941 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dct_decode.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include "codec_internal.h"
-#include "quant_lookup.h"
-
-
-#define GOLDEN_FRAME_THRESH_Q   50
-#define PUR 8
-#define PU 4
-#define PUL 2
-#define PL 1
-#define HIGHBITDUPPED(X) (((signed short) X)  >> 15)
-
-
-static const int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 };
-
-static void SetupBoundingValueArray_Generic(ogg_int16_t *BoundingValuePtr,
-                                            ogg_int32_t FLimit){
-
-  ogg_int32_t i;
-
-  /* Set up the bounding value array. */
-  memset ( BoundingValuePtr, 0, (256*sizeof(*BoundingValuePtr)) );
-  for ( i = 0; i < FLimit; i++ ){
-    BoundingValuePtr[127-i-FLimit] = (-FLimit+i);
-    BoundingValuePtr[127-i] = -i;
-    BoundingValuePtr[127+i] = i;
-    BoundingValuePtr[127+i+FLimit] = FLimit-i;
-  }
-}
-
-static void ExpandKFBlock ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber ){
-  ogg_uint32_t ReconPixelsPerLine;
-  ogg_int32_t     ReconPixelIndex;
-
-  /* Select the appropriate inverse Q matrix and line stride */
-  if ( FragmentNumber<(ogg_int32_t)pbi->YPlaneFragments ){
-    ReconPixelsPerLine = pbi->YStride;
-    pbi->dequant_coeffs = pbi->dequant_Y_coeffs;
-  }else if ( FragmentNumber<(ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) ){
-    ReconPixelsPerLine = pbi->UVStride;
-    pbi->dequant_coeffs = pbi->dequant_U_coeffs;
-  }else{
-    ReconPixelsPerLine = pbi->UVStride;
-    pbi->dequant_coeffs = pbi->dequant_V_coeffs;
-  }
-
-  /* Set up pointer into the quantisation buffer. */
-  pbi->quantized_list = &pbi->QFragData[FragmentNumber][0];
-
-  /* Invert quantisation and DCT to get pixel data. */
-  switch(pbi->FragCoefEOB[FragmentNumber]){
-  case 0:case 1:
-    IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-    break;
-  case 2: case 3:
-    dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-    break;
-  case 4:case 5:case 6:case 7:case 8: case 9:case 10:
-    dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-    break;
-  default:
-    dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-  }
-
-  /* Convert fragment number to a pixel offset in a reconstruction buffer. */
-  ReconPixelIndex = pbi->recon_pixel_index_table[FragmentNumber];
-
-  /* Get the pixel index for the first pixel in the fragment. */
-  dsp_recon_intra8x8 (pbi->dsp, (unsigned char *)(&pbi->ThisFrameRecon[ReconPixelIndex]),
-              (ogg_int16_t *)pbi->ReconDataBuffer, ReconPixelsPerLine);
-}
-
-static void ExpandBlock ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber){
-  unsigned char *LastFrameRecPtr;   /* Pointer into previous frame
-                                       reconstruction. */
-  unsigned char *LastFrameRecPtr2;  /* Pointer into previous frame
-                                       reconstruction for 1/2 pixel MC. */
-
-  ogg_uint32_t   ReconPixelsPerLine; /* Pixels per line */
-  ogg_int32_t    ReconPixelIndex;    /* Offset for block into a
-                                        reconstruction buffer */
-  ogg_int32_t    ReconPtr2Offset;    /* Offset for second
-                                        reconstruction in half pixel
-                                        MC */
-  ogg_int32_t    MVOffset;           /* Baseline motion vector offset */
-  ogg_int32_t    MvShift  ;          /* Shift to correct to 1/2 or 1/4 pixel */
-  ogg_int32_t    MvModMask;          /* Mask to determine whether 1/2
-                                        pixel is used */
-
-  /* Get coding mode for this block */
-  if ( pbi->FrameType == KEY_FRAME ){
-    pbi->CodingMode = CODE_INTRA;
-  }else{
-    /* Get Motion vector and mode for this block. */
-    pbi->CodingMode = pbi->FragCodingMethod[FragmentNumber];
-  }
-
-  /* Select the appropriate inverse Q matrix and line stride */
-  if ( FragmentNumber<(ogg_int32_t)pbi->YPlaneFragments ) {
-    ReconPixelsPerLine = pbi->YStride;
-    MvShift = 1;
-    MvModMask = 0x00000001;
-
-    /* Select appropriate dequantiser matrix. */
-    if ( pbi->CodingMode == CODE_INTRA )
-      pbi->dequant_coeffs = pbi->dequant_Y_coeffs;
-    else
-      pbi->dequant_coeffs = pbi->dequant_InterY_coeffs;
-  }else{
-    ReconPixelsPerLine = pbi->UVStride;
-    MvShift = 2;
-    MvModMask = 0x00000003;
-
-    /* Select appropriate dequantiser matrix. */
-    if ( pbi->CodingMode == CODE_INTRA )
-      if ( FragmentNumber <
-                (ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) )
-        pbi->dequant_coeffs = pbi->dequant_U_coeffs;
-      else
-        pbi->dequant_coeffs = pbi->dequant_V_coeffs;
-    else
-      if ( FragmentNumber <
-                (ogg_int32_t)(pbi->YPlaneFragments + pbi->UVPlaneFragments) )
-        pbi->dequant_coeffs = pbi->dequant_InterU_coeffs;
-      else
-        pbi->dequant_coeffs = pbi->dequant_InterV_coeffs;
-  }
-
-  /* Set up pointer into the quantisation buffer. */
-  pbi->quantized_list = &pbi->QFragData[FragmentNumber][0];
-
-  /* Invert quantisation and DCT to get pixel data. */
-  switch(pbi->FragCoefEOB[FragmentNumber]){
-  case 0:case 1:
-    IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-    break;
-  case 2: case 3:
-    dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-    break;
-  case 4:case 5:case 6:case 7:case 8: case 9:case 10:
-    dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-    break;
-  default:
-    dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
-  }
-
-  /* Convert fragment number to a pixel offset in a reconstruction buffer. */
-  ReconPixelIndex = pbi->recon_pixel_index_table[FragmentNumber];
-
-  /* Action depends on decode mode. */
-  if ( pbi->CodingMode == CODE_INTER_NO_MV ){
-    /* Inter with no motion vector */
-    /* Reconstruct the pixel data using the last frame reconstruction
-       and change data when the motion vector is (0,0), the recon is
-       based on the lastframe without loop filtering---- for testing */
-    dsp_recon_inter8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
-                &pbi->LastFrameRecon[ReconPixelIndex],
-                  pbi->ReconDataBuffer, ReconPixelsPerLine);
-  }else if ( ModeUsesMC[pbi->CodingMode] ) {
-    /* The mode uses a motion vector. */
-    /* Get vector from list */
-    pbi->MVector.x = pbi->FragMVect[FragmentNumber].x;
-    pbi->MVector.y = pbi->FragMVect[FragmentNumber].y;
-
-    /* Work out the base motion vector offset and the 1/2 pixel offset
-       if any.  For the U and V planes the MV specifies 1/4 pixel
-       accuracy. This is adjusted to 1/2 pixel as follows ( 0->0,
-       1/4->1/2, 1/2->1/2, 3/4->1/2 ). */
-    MVOffset = 0;
-    ReconPtr2Offset = 0;
-    if ( pbi->MVector.x > 0 ){
-      MVOffset = pbi->MVector.x >> MvShift;
-      if ( pbi->MVector.x & MvModMask )
-        ReconPtr2Offset += 1;
-    } else if ( pbi->MVector.x < 0 ) {
-      MVOffset -= (-pbi->MVector.x) >> MvShift;
-      if ( (-pbi->MVector.x) & MvModMask )
-        ReconPtr2Offset -= 1;
-    }
-
-    if ( pbi->MVector.y > 0 ){
-      MVOffset += (pbi->MVector.y >>  MvShift) * ReconPixelsPerLine;
-      if ( pbi->MVector.y & MvModMask )
-        ReconPtr2Offset += ReconPixelsPerLine;
-    } else if ( pbi->MVector.y < 0 ){
-      MVOffset -= ((-pbi->MVector.y) >> MvShift) * ReconPixelsPerLine;
-      if ( (-pbi->MVector.y) & MvModMask )
-        ReconPtr2Offset -= ReconPixelsPerLine;
-    }
-
-    /* Set up the first of the two reconstruction buffer pointers. */
-    if ( pbi->CodingMode==CODE_GOLDEN_MV ) {
-      LastFrameRecPtr = &pbi->GoldenFrame[ReconPixelIndex] + MVOffset;
-    }else{
-      LastFrameRecPtr = &pbi->LastFrameRecon[ReconPixelIndex] + MVOffset;
-    }
-
-    /* Set up the second of the two reconstruction pointers. */
-    LastFrameRecPtr2 = LastFrameRecPtr + ReconPtr2Offset;
-
-    /* Select the appropriate reconstruction function */
-    if ( (int)(LastFrameRecPtr - LastFrameRecPtr2) == 0 ) {
-      /* Reconstruct the pixel dats from the reference frame and change data
-         (no half pixel in this case as the two references were the same. */
-      dsp_recon_inter8x8 (pbi->dsp,
-          &pbi->ThisFrameRecon[ReconPixelIndex],
-                  LastFrameRecPtr, pbi->ReconDataBuffer,
-                  ReconPixelsPerLine);
-    }else{
-      /* Fractional pixel reconstruction. */
-      /* Note that we only use two pixels per reconstruction even for
-         the diagonal. */
-      dsp_recon_inter8x8_half(pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
-                            LastFrameRecPtr, LastFrameRecPtr2,
-                            pbi->ReconDataBuffer, ReconPixelsPerLine);
-    }
-  } else if ( pbi->CodingMode == CODE_USING_GOLDEN ){
-    /* Golden frame with motion vector */
-    /* Reconstruct the pixel data using the golden frame
-       reconstruction and change data */
-    dsp_recon_inter8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
-                &pbi->GoldenFrame[ ReconPixelIndex ],
-                  pbi->ReconDataBuffer, ReconPixelsPerLine);
-  } else {
-    /* Simple Intra coding */
-    /* Get the pixel index for the first pixel in the fragment. */
-    dsp_recon_intra8x8 (pbi->dsp, &pbi->ThisFrameRecon[ReconPixelIndex],
-              pbi->ReconDataBuffer, ReconPixelsPerLine);
-  }
-}
-
-static void UpdateUMV_HBorders( PB_INSTANCE *pbi,
-                                unsigned char * DestReconPtr,
-                                ogg_uint32_t  PlaneFragOffset ) {
-  ogg_uint32_t  i;
-  ogg_uint32_t  PixelIndex;
-
-  ogg_uint32_t  PlaneStride;
-  ogg_uint32_t  BlockVStep;
-  ogg_uint32_t  PlaneFragments;
-  ogg_uint32_t  LineFragments;
-  ogg_uint32_t  PlaneBorderWidth;
-
-  unsigned char   *SrcPtr1;
-  unsigned char   *SrcPtr2;
-  unsigned char   *DestPtr1;
-  unsigned char   *DestPtr2;
-
-  /* Work out various plane specific values */
-  if ( PlaneFragOffset == 0 ) {
-    /* Y Plane */
-    BlockVStep = (pbi->YStride *
-                  (VFRAGPIXELS - 1));
-    PlaneStride = pbi->YStride;
-    PlaneBorderWidth = UMV_BORDER;
-    PlaneFragments = pbi->YPlaneFragments;
-    LineFragments = pbi->HFragments;
-  }else{
-    /* U or V plane. */
-    BlockVStep = (pbi->UVStride *
-                  (VFRAGPIXELS - 1));
-    PlaneStride = pbi->UVStride;
-    PlaneBorderWidth = UMV_BORDER / 2;
-    PlaneFragments = pbi->UVPlaneFragments;
-    LineFragments = pbi->HFragments / 2;
-  }
-
-  /* Setup the source and destination pointers for the top and bottom
-     borders */
-  PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset];
-  SrcPtr1 = &DestReconPtr[ PixelIndex - PlaneBorderWidth ];
-  DestPtr1 = SrcPtr1 - (PlaneBorderWidth * PlaneStride);
-
-  PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset +
-                                           PlaneFragments - LineFragments] +
-    BlockVStep;
-  SrcPtr2 = &DestReconPtr[ PixelIndex - PlaneBorderWidth];
-  DestPtr2 = SrcPtr2 + PlaneStride;
-
-  /* Now copy the top and bottom source lines into each line of the
-     respective borders */
-  for ( i = 0; i < PlaneBorderWidth; i++ ) {
-    memcpy( DestPtr1, SrcPtr1, PlaneStride );
-    memcpy( DestPtr2, SrcPtr2, PlaneStride );
-    DestPtr1 += PlaneStride;
-    DestPtr2 += PlaneStride;
-  }
-}
-
-static void UpdateUMV_VBorders( PB_INSTANCE *pbi,
-                                unsigned char * DestReconPtr,
-                                ogg_uint32_t  PlaneFragOffset ){
-  ogg_uint32_t  i;
-  ogg_uint32_t  PixelIndex;
-
-  ogg_uint32_t  PlaneStride;
-  ogg_uint32_t  LineFragments;
-  ogg_uint32_t  PlaneBorderWidth;
-  ogg_uint32_t   PlaneHeight;
-
-  unsigned char   *SrcPtr1;
-  unsigned char   *SrcPtr2;
-  unsigned char   *DestPtr1;
-  unsigned char   *DestPtr2;
-
-  /* Work out various plane specific values */
-  if ( PlaneFragOffset == 0 ) {
-    /* Y Plane */
-    PlaneStride = pbi->YStride;
-    PlaneBorderWidth = UMV_BORDER;
-    LineFragments = pbi->HFragments;
-    PlaneHeight = pbi->info.height;
-  }else{
-    /* U or V plane. */
-    PlaneStride = pbi->UVStride;
-    PlaneBorderWidth = UMV_BORDER / 2;
-    LineFragments = pbi->HFragments / 2;
-    PlaneHeight = pbi->info.height / 2;
-  }
-
-  /* Setup the source data values and destination pointers for the
-     left and right edge borders */
-  PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset];
-  SrcPtr1 = &DestReconPtr[ PixelIndex ];
-  DestPtr1 = &DestReconPtr[ PixelIndex - PlaneBorderWidth ];
-
-  PixelIndex = pbi->recon_pixel_index_table[PlaneFragOffset +
-                                           LineFragments - 1] +
-    (HFRAGPIXELS - 1);
-  SrcPtr2 = &DestReconPtr[ PixelIndex ];
-  DestPtr2 = &DestReconPtr[ PixelIndex + 1 ];
-
-  /* Now copy the top and bottom source lines into each line of the
-     respective borders */
-  for ( i = 0; i < PlaneHeight; i++ ) {
-    memset( DestPtr1, SrcPtr1[0], PlaneBorderWidth );
-    memset( DestPtr2, SrcPtr2[0], PlaneBorderWidth );
-    SrcPtr1 += PlaneStride;
-    SrcPtr2 += PlaneStride;
-    DestPtr1 += PlaneStride;
-    DestPtr2 += PlaneStride;
-  }
-}
-
-void UpdateUMVBorder( PB_INSTANCE *pbi,
-                      unsigned char * DestReconPtr ) {
-  ogg_uint32_t  PlaneFragOffset;
-
-  /* Y plane */
-  PlaneFragOffset = 0;
-  UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset );
-  UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset );
-
-  /* Then the U and V Planes */
-  PlaneFragOffset = pbi->YPlaneFragments;
-  UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset );
-  UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset );
-
-  PlaneFragOffset = pbi->YPlaneFragments + pbi->UVPlaneFragments;
-  UpdateUMV_VBorders( pbi, DestReconPtr, PlaneFragOffset );
-  UpdateUMV_HBorders( pbi, DestReconPtr, PlaneFragOffset );
-}
-
-static void CopyRecon( PB_INSTANCE *pbi, unsigned char * DestReconPtr,
-                unsigned char * SrcReconPtr ) {
-  ogg_uint32_t  i;
-  ogg_uint32_t  PlaneLineStep; /* Pixels per line */
-  ogg_uint32_t  PixelIndex;
-
-  unsigned char  *SrcPtr;      /* Pointer to line of source image data */
-  unsigned char  *DestPtr;     /* Pointer to line of destination image data */
-
-  /* Copy over only updated blocks.*/
-
-  /* First Y plane */
-  PlaneLineStep = pbi->YStride;
-  for ( i = 0; i < pbi->YPlaneFragments; i++ ) {
-    if ( pbi->display_fragments[i] ) {
-      PixelIndex = pbi->recon_pixel_index_table[i];
-      SrcPtr = &SrcReconPtr[ PixelIndex ];
-      DestPtr = &DestReconPtr[ PixelIndex ];
-
-      dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
-    }
-  }
-
-  /* Then U and V */
-  PlaneLineStep = pbi->UVStride;
-  for ( i = pbi->YPlaneFragments; i < pbi->UnitFragments; i++ ) {
-    if ( pbi->display_fragments[i] ) {
-      PixelIndex = pbi->recon_pixel_index_table[i];
-      SrcPtr = &SrcReconPtr[ PixelIndex ];
-      DestPtr = &DestReconPtr[ PixelIndex ];
-
-      dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
-
-    }
-  }
-}
-
-static void CopyNotRecon( PB_INSTANCE *pbi, unsigned char * DestReconPtr,
-                   unsigned char * SrcReconPtr ) {
-  ogg_uint32_t  i;
-  ogg_uint32_t  PlaneLineStep; /* Pixels per line */
-  ogg_uint32_t  PixelIndex;
-
-  unsigned char  *SrcPtr;      /* Pointer to line of source image data */
-  unsigned char  *DestPtr;     /* Pointer to line of destination image data*/
-
-  /* Copy over only updated blocks. */
-
-  /* First Y plane */
-  PlaneLineStep = pbi->YStride;
-  for ( i = 0; i < pbi->YPlaneFragments; i++ ) {
-    if ( !pbi->display_fragments[i] ) {
-      PixelIndex = pbi->recon_pixel_index_table[i];
-      SrcPtr = &SrcReconPtr[ PixelIndex ];
-      DestPtr = &DestReconPtr[ PixelIndex ];
-
-      dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
-    }
-  }
-
-  /* Then U and V */
-  PlaneLineStep = pbi->UVStride;
-  for ( i = pbi->YPlaneFragments; i < pbi->UnitFragments; i++ ) {
-    if ( !pbi->display_fragments[i] ) {
-      PixelIndex = pbi->recon_pixel_index_table[i];
-      SrcPtr = &SrcReconPtr[ PixelIndex ];
-      DestPtr = &DestReconPtr[ PixelIndex ];
-
-      dsp_copy8x8 (pbi->dsp, SrcPtr, DestPtr, PlaneLineStep);
-
-    }
-  }
-}
-
-void ExpandToken( Q_LIST_ENTRY * ExpandedBlock,
-                  unsigned char * CoeffIndex, ogg_uint32_t Token,
-                  ogg_int32_t ExtraBits ){
-  /* Is the token is a combination run and value token. */
-  if ( Token >= DCT_RUN_CATEGORY1 ){
-    /* Expand the token and additional bits to a zero run length and
-       data value.  */
-    if ( Token < DCT_RUN_CATEGORY2 ) {
-      /* Decoding method depends on token */
-      if ( Token < DCT_RUN_CATEGORY1B ) {
-        /* Step on by the zero run length */
-        *CoeffIndex += (unsigned char)((Token - DCT_RUN_CATEGORY1) + 1);
-
-        /* The extra bit determines the sign. */
-        if ( ExtraBits & 0x01 )
-          ExpandedBlock[*CoeffIndex] = -1;
-        else
-          ExpandedBlock[*CoeffIndex] = 1;
-      } else if ( Token == DCT_RUN_CATEGORY1B ) {
-        /* Bits 0-1 determines the zero run length */
-        *CoeffIndex += (6 + (ExtraBits & 0x03));
-
-        /* Bit 2 determines the sign */
-        if ( ExtraBits & 0x04 )
-          ExpandedBlock[*CoeffIndex] = -1;
-        else
-          ExpandedBlock[*CoeffIndex] = 1;
-      }else{
-        /* Bits 0-2 determines the zero run length */
-        *CoeffIndex += (10 + (ExtraBits & 0x07));
-
-        /* Bit 3 determines the sign */
-        if ( ExtraBits & 0x08 )
-          ExpandedBlock[*CoeffIndex] = -1;
-        else
-          ExpandedBlock[*CoeffIndex] = 1;
-      }
-    }else{
-      /* If token == DCT_RUN_CATEGORY2 we have a single 0 followed by
-         a value */
-      if ( Token == DCT_RUN_CATEGORY2 ){
-        /* Step on by the zero run length */
-        *CoeffIndex += 1;
-
-        /* Bit 1 determines sign, bit 0 the value */
-        if ( ExtraBits & 0x02 )
-          ExpandedBlock[*CoeffIndex] = -(2 + (ExtraBits & 0x01));
-        else
-          ExpandedBlock[*CoeffIndex] = 2 + (ExtraBits & 0x01);
-      }else{
-        /* else we have 2->3 zeros followed by a value */
-        /* Bit 0 determines the zero run length */
-        *CoeffIndex += 2 + (ExtraBits & 0x01);
-
-        /* Bit 2 determines the sign, bit 1 the value */
-        if ( ExtraBits & 0x04 )
-          ExpandedBlock[*CoeffIndex] = -(2 + ((ExtraBits & 0x02) >> 1));
-        else
-          ExpandedBlock[*CoeffIndex] = 2 + ((ExtraBits & 0x02) >> 1);
-      }
-    }
-
-    /* Step on over value */
-    *CoeffIndex += 1;
-
-  } else if ( Token == DCT_SHORT_ZRL_TOKEN ) {
-    /* Token is a ZRL token so step on by the appropriate number of zeros */
-    *CoeffIndex += ExtraBits + 1;
-  } else if ( Token == DCT_ZRL_TOKEN ) {
-    /* Token is a ZRL token so step on by the appropriate number of zeros */
-    *CoeffIndex += ExtraBits + 1;
-  } else if ( Token < LOW_VAL_TOKENS ) {
-    /* Token is a small single value token. */
-    switch ( Token ) {
-    case ONE_TOKEN:
-      ExpandedBlock[*CoeffIndex] = 1;
-      break;
-    case MINUS_ONE_TOKEN:
-      ExpandedBlock[*CoeffIndex] = -1;
-      break;
-    case TWO_TOKEN:
-      ExpandedBlock[*CoeffIndex] = 2;
-      break;
-    case MINUS_TWO_TOKEN:
-      ExpandedBlock[*CoeffIndex] = -2;
-      break;
-    }
-
-    /* Step on the coefficient index. */
-    *CoeffIndex += 1;
-  }else{
-    /* Token is a larger single value token */
-    /* Expand the token and additional bits to a data value. */
-    if ( Token < DCT_VAL_CATEGORY3 ) {
-      /* Offset from LOW_VAL_TOKENS determines value */
-      Token = Token - LOW_VAL_TOKENS;
-
-      /* Extra bit determines sign */
-      if ( ExtraBits )
-        ExpandedBlock[*CoeffIndex] =
-          -((Q_LIST_ENTRY)(Token + DCT_VAL_CAT2_MIN));
-      else
-        ExpandedBlock[*CoeffIndex] =
-          (Q_LIST_ENTRY)(Token + DCT_VAL_CAT2_MIN);
-    } else if ( Token == DCT_VAL_CATEGORY3 ) {
-      /* Bit 1 determines sign, Bit 0 the value */
-      if ( ExtraBits & 0x02 )
-        ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT3_MIN + (ExtraBits & 0x01));
-      else
-        ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT3_MIN + (ExtraBits & 0x01);
-    } else if ( Token == DCT_VAL_CATEGORY4 ) {
-      /* Bit 2 determines sign, Bit 0-1 the value */
-      if ( ExtraBits & 0x04 )
-        ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT4_MIN + (ExtraBits & 0x03));
-      else
-        ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT4_MIN + (ExtraBits & 0x03);
-    } else if ( Token == DCT_VAL_CATEGORY5 ) {
-      /* Bit 3 determines sign, Bit 0-2 the value */
-      if ( ExtraBits & 0x08 )
-        ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT5_MIN + (ExtraBits & 0x07));
-      else
-        ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT5_MIN + (ExtraBits & 0x07);
-    } else if ( Token == DCT_VAL_CATEGORY6 ) {
-      /* Bit 4 determines sign, Bit 0-3 the value */
-      if ( ExtraBits & 0x10 )
-        ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT6_MIN + (ExtraBits & 0x0F));
-      else
-        ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT6_MIN + (ExtraBits & 0x0F);
-    } else if ( Token == DCT_VAL_CATEGORY7 ) {
-      /* Bit 5 determines sign, Bit 0-4 the value */
-      if ( ExtraBits & 0x20 )
-        ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT7_MIN + (ExtraBits & 0x1F));
-      else
-        ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT7_MIN + (ExtraBits & 0x1F);
-    } else if ( Token == DCT_VAL_CATEGORY8 ) {
-      /* Bit 9 determines sign, Bit 0-8 the value */
-      if ( ExtraBits & 0x200 )
-        ExpandedBlock[*CoeffIndex] = -(DCT_VAL_CAT8_MIN + (ExtraBits & 0x1FF));
-      else
-        ExpandedBlock[*CoeffIndex] = DCT_VAL_CAT8_MIN + (ExtraBits & 0x1FF);
-    }
-
-    /* Step on the coefficient index. */
-    *CoeffIndex += 1;
-  }
-}
-
-void ClearDownQFragData(PB_INSTANCE *pbi){
-  ogg_int32_t       i;
-  Q_LIST_ENTRY *    QFragPtr;
-
-  for ( i = 0; i < pbi->CodedBlockIndex; i++ ) {
-    /* Get the linear index for the current fragment. */
-    QFragPtr = pbi->QFragData[pbi->CodedBlockList[i]];
-    memset(QFragPtr, 0, 64*sizeof(Q_LIST_ENTRY));
-  }
-}
-
-static void loop_filter_h(unsigned char * PixelPtr,
-                          ogg_int32_t LineLength,
-                          ogg_int16_t *BoundingValuePtr){
-  ogg_int32_t j;
-  ogg_int32_t FiltVal;
-  PixelPtr-=2;
-
-  for ( j = 0; j < 8; j++ ){
-    FiltVal =
-      ( PixelPtr[0] ) -
-      ( PixelPtr[1] * 3 ) +
-      ( PixelPtr[2] * 3 ) -
-      ( PixelPtr[3] );
-
-    FiltVal = *(BoundingValuePtr+((FiltVal + 4) >> 3));
-
-    PixelPtr[1] = clamp255(PixelPtr[1] + FiltVal);
-    PixelPtr[2] = clamp255(PixelPtr[2] - FiltVal);
-
-    PixelPtr += LineLength;
-  }
-}
-
-static void loop_filter_v(unsigned char * PixelPtr,
-                          ogg_int32_t LineLength,
-                          ogg_int16_t *BoundingValuePtr){
-  ogg_int32_t j;
-  ogg_int32_t FiltVal;
-  PixelPtr -= 2*LineLength;
-
-  for ( j = 0; j < 8; j++ ) {
-    FiltVal = ( (ogg_int32_t)PixelPtr[0] ) -
-      ( (ogg_int32_t)PixelPtr[LineLength] * 3 ) +
-      ( (ogg_int32_t)PixelPtr[2 * LineLength] * 3 ) -
-      ( (ogg_int32_t)PixelPtr[3 * LineLength] );
-
-    FiltVal = *(BoundingValuePtr+((FiltVal + 4) >> 3));
-
-    PixelPtr[LineLength] = clamp255(PixelPtr[LineLength] + FiltVal);
-    PixelPtr[2 * LineLength] = clamp255(PixelPtr[2*LineLength] - FiltVal);
-
-    PixelPtr ++;
-  }
-}
-
-static void LoopFilter__c(PB_INSTANCE *pbi, int FLimit){
-
-  int j;
-  ogg_int16_t BoundingValues[256];
-  ogg_int16_t *bvp = BoundingValues+127;
-  unsigned char *cp = pbi->display_fragments;
-  ogg_uint32_t *bp = pbi->recon_pixel_index_table;
-
-  if ( FLimit == 0 ) return;
-  SetupBoundingValueArray_Generic(BoundingValues, FLimit);
-
-  for ( j = 0; j < 3 ; j++){
-    ogg_uint32_t *bp_begin = bp;
-    ogg_uint32_t *bp_end;
-    int stride;
-    int h;
-
-    switch(j) {
-    case 0: /* y */
-      bp_end = bp + pbi->YPlaneFragments;
-      h = pbi->HFragments;
-      stride = pbi->YStride;
-      break;
-    default: /* u,v, 4:20 specific */
-      bp_end = bp + pbi->UVPlaneFragments;
-      h = pbi->HFragments >> 1;
-      stride = pbi->UVStride;
-      break;
-    }
-
-    while(bp<bp_end){
-      ogg_uint32_t *bp_left = bp;
-      ogg_uint32_t *bp_right = bp + h;
-      while(bp<bp_right){
-        if(cp[0]){
-          if(bp>bp_left)
-            loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,bvp);
-          if(bp_left>bp_begin)
-            loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,bvp);
-          if(bp+1<bp_right && !cp[1])
-            loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,bvp);
-          if(bp+h<bp_end && !cp[h])
-            loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,bvp);
-        }
-        bp++;
-        cp++;
-      }
-    }
-  }
-}
-
-void ReconRefFrames (PB_INSTANCE *pbi){
-  ogg_int32_t i;
-  unsigned char *SwapReconBuffersTemp;
-
-  /* predictor multiplier up-left, up, up-right,left, shift
-     Entries are packed in the order L, UL, U, UR, with missing entries
-      moved to the end (before the shift parameters). */
-  static const ogg_int16_t pc[16][6]={
-    {0,0,0,0,0,0},
-    {1,0,0,0,0,0},      /* PL */
-    {1,0,0,0,0,0},      /* PUL */
-    {1,0,0,0,0,0},      /* PUL|PL */
-    {1,0,0,0,0,0},      /* PU */
-    {1,1,0,0,1,1},      /* PU|PL */
-    {0,1,0,0,0,0},      /* PU|PUL */
-    {29,-26,29,0,5,31}, /* PU|PUL|PL */
-    {1,0,0,0,0,0},      /* PUR */
-    {75,53,0,0,7,127},  /* PUR|PL */
-    {1,1,0,0,1,1},      /* PUR|PUL */
-    {75,0,53,0,7,127},  /* PUR|PUL|PL */
-    {1,0,0,0,0,0},      /* PUR|PU */
-    {75,0,53,0,7,127},  /* PUR|PU|PL */
-    {3,10,3,0,4,15},    /* PUR|PU|PUL */
-    {29,-26,29,0,5,31}  /* PUR|PU|PUL|PL */
-  };
-
-  /* boundary case bit masks. */
-  static const int bc_mask[8]={
-    /* normal case no boundary condition */
-    PUR|PU|PUL|PL,
-    /* left column */
-    PUR|PU,
-    /* top row */
-    PL,
-    /* top row, left column */
-    0,
-    /* right column */
-    PU|PUL|PL,
-    /* right and left column */
-    PU,
-    /* top row, right column */
-    PL,
-    /* top row, right and left column */
-    0
-  };
-
-  /* value left value up-left, value up, value up-right, missing
-      values skipped. */
-  int v[4];
-
-  /* fragment number left, up-left, up, up-right */
-  int fn[4];
-
-  /* predictor count. */
-  int pcount;
-
-  short wpc;
-  static const short Mode2Frame[] = {
-    1,  /* CODE_INTER_NO_MV     0 => Encoded diff from same MB last frame  */
-    0,  /* CODE_INTRA           1 => DCT Encoded Block */
-    1,  /* CODE_INTER_PLUS_MV   2 => Encoded diff from included MV MB last frame */
-    1,  /* CODE_INTER_LAST_MV   3 => Encoded diff from MRU MV MB last frame */
-    1,  /* CODE_INTER_PRIOR_MV  4 => Encoded diff from included 4 separate MV blocks */
-    2,  /* CODE_USING_GOLDEN    5 => Encoded diff from same MB golden frame */
-    2,  /* CODE_GOLDEN_MV       6 => Encoded diff from included MV MB golden frame */
-    1   /* CODE_INTER_FOUR_MV   7 => Encoded diff from included 4 separate MV blocks */
-  };
-  short Last[3];
-  short PredictedDC;
-  int FragsAcross=pbi->HFragments;
-  int FromFragment,ToFragment;
-  int FragsDown = pbi->VFragments;
-
-  int WhichFrame;
-  int WhichCase;
-  int j,k,m,n;
-
-  void (*ExpandBlockA) ( PB_INSTANCE *pbi, ogg_int32_t FragmentNumber );
-
-  if ( pbi->FrameType == KEY_FRAME )
-    ExpandBlockA=ExpandKFBlock;
-  else
-    ExpandBlockA=ExpandBlock;
-
-  /* for y,u,v */
-  for ( j = 0; j < 3 ; j++) {
-    /* pick which fragments based on Y, U, V */
-    switch(j){
-    case 0: /* y */
-      FromFragment = 0;
-      ToFragment = pbi->YPlaneFragments;
-      FragsAcross = pbi->HFragments;
-      FragsDown = pbi->VFragments;
-      break;
-    case 1: /* u */
-      FromFragment = pbi->YPlaneFragments;
-      ToFragment = pbi->YPlaneFragments + pbi->UVPlaneFragments ;
-      FragsAcross = pbi->HFragments >> 1;
-      FragsDown = pbi->VFragments >> 1;
-      break;
-    /*case 2:  v */
-    default:
-      FromFragment = pbi->YPlaneFragments + pbi->UVPlaneFragments;
-      ToFragment = pbi->YPlaneFragments + (2 * pbi->UVPlaneFragments) ;
-      FragsAcross = pbi->HFragments >> 1;
-      FragsDown = pbi->VFragments >> 1;
-      break;
-    }
-
-    /* initialize our array of last used DC Components */
-    for(k=0;k<3;k++)
-      Last[k]=0;
-
-    i=FromFragment;
-
-    /* do prediction on all of Y, U or V */
-    for ( m = 0 ; m < FragsDown ; m++) {
-      for ( n = 0 ; n < FragsAcross ; n++, i++){
-
-        /* only do 2 prediction if fragment coded and on non intra or
-           if all fragments are intra */
-        if( pbi->display_fragments[i] || (pbi->FrameType == KEY_FRAME) ){
-          /* Type of Fragment */
-          WhichFrame = Mode2Frame[pbi->FragCodingMethod[i]];
-
-          /* Check Borderline Cases */
-          WhichCase = (n==0) + ((m==0) << 1) + ((n+1 == FragsAcross) << 2);
-
-          fn[0]=i-1;
-          fn[1]=i-FragsAcross-1;
-          fn[2]=i-FragsAcross;
-          fn[3]=i-FragsAcross+1;
-
-          /* fragment valid for prediction use if coded and it comes
-             from same frame as the one we are predicting */
-          for(k=pcount=wpc=0; k<4; k++) {
-            int pflag;
-            pflag=1<<k;
-            if((bc_mask[WhichCase]&pflag) &&
-               pbi->display_fragments[fn[k]] &&
-               (Mode2Frame[pbi->FragCodingMethod[fn[k]]] == WhichFrame)){
-              v[pcount]=pbi->QFragData[fn[k]][0];
-              wpc|=pflag;
-              pcount++;
-            }
-          }
-
-          if(wpc==0){
-            /* fall back to the last coded fragment */
-            pbi->QFragData[i][0] += Last[WhichFrame];
-
-          }else{
-
-            /* don't do divide if divisor is 1 or 0 */
-            PredictedDC = pc[wpc][0]*v[0];
-            for(k=1; k<pcount; k++){
-              PredictedDC += pc[wpc][k]*v[k];
-            }
-
-            /* if we need to do a shift */
-            if(pc[wpc][4] != 0 ){
-
-              /* If negative add in the negative correction factor */
-              PredictedDC += (HIGHBITDUPPED(PredictedDC) & pc[wpc][5]);
-
-              /* Shift in lieu of a divide */
-              PredictedDC >>= pc[wpc][4];
-            }
-
-            /* check for outranging on the two predictors that can outrange */
-            if((wpc&(PU|PUL|PL)) == (PU|PUL|PL)){
-              if( abs(PredictedDC - v[2]) > 128) {
-                PredictedDC = v[2];
-              } else if( abs(PredictedDC - v[0]) > 128) {
-                PredictedDC = v[0];
-              } else if( abs(PredictedDC - v[1]) > 128) {
-                PredictedDC = v[1];
-              }
-            }
-
-            pbi->QFragData[i][0] += PredictedDC;
-
-          }
-
-          /* Save the last fragment coded for whatever frame we are
-             predicting from */
-          Last[WhichFrame] = pbi->QFragData[i][0];
-
-          /* Inverse DCT and reconstitute buffer in thisframe */
-          ExpandBlockA( pbi, i );
-        }
-      }
-    }
-  }
-
-  /* Copy the current reconstruction back to the last frame recon buffer. */
-  if(pbi->CodedBlockIndex > (ogg_int32_t) (pbi->UnitFragments >> 1)){
-    SwapReconBuffersTemp = pbi->ThisFrameRecon;
-    pbi->ThisFrameRecon = pbi->LastFrameRecon;
-    pbi->LastFrameRecon = SwapReconBuffersTemp;
-    CopyNotRecon( pbi, pbi->LastFrameRecon, pbi->ThisFrameRecon );
-  }else{
-    CopyRecon( pbi, pbi->LastFrameRecon, pbi->ThisFrameRecon );
-  }
-
-  /* Apply a loop filter to edge pixels of updated blocks */
-  dsp_LoopFilter(pbi->dsp, pbi, pbi->quant_info.loop_filter_limits[pbi->FrameQIndex]);
-
-  /* We may need to update the UMV border */
-  UpdateUMVBorder(pbi, pbi->LastFrameRecon);
-
-  /* Reconstruct the golden frame if necessary.
-     For VFW codec only on key frames */
-  if ( pbi->FrameType == KEY_FRAME ){
-    CopyRecon( pbi, pbi->GoldenFrame, pbi->LastFrameRecon );
-    /* We may need to update the UMV border */
-    UpdateUMVBorder(pbi, pbi->GoldenFrame);
-  }
-}
-
-void dsp_dct_decode_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
-  funcs->LoopFilter = LoopFilter__c;
-#if defined(USE_ASM)
-  // Todo: Port the dct for MSC one day.
-#if !defined (_MSC_VER)
-  if (cpu_flags & OC_CPU_X86_MMX) {
-    dsp_mmx_dct_decode_init(funcs);
-  }
-#endif
-#endif
-}

+ 0 - 469
Engine/lib/libtheora/lib/enc/dct_encode.c

@@ -1,469 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dct_encode.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include "codec_internal.h"
-#include "dsp.h"
-#include "quant_lookup.h"
-
-
-static int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 };
-
-static unsigned char TokenizeDctValue (ogg_int16_t DataValue,
-                                       ogg_uint32_t * TokenListPtr ){
-  unsigned char tokens_added = 0;
-  ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue );
-
-  /* Values are tokenised as category value and a number of additional
-     bits that define the position within the category.  */
-
-  if ( DataValue == 0 ) return 0;
-
-  if ( AbsDataVal == 1 ){
-    if ( DataValue == 1 )
-      TokenListPtr[0] = ONE_TOKEN;
-    else
-      TokenListPtr[0] = MINUS_ONE_TOKEN;
-    tokens_added = 1;
-  } else if ( AbsDataVal == 2 ) {
-    if ( DataValue == 2 )
-      TokenListPtr[0] = TWO_TOKEN;
-    else
-      TokenListPtr[0] = MINUS_TWO_TOKEN;
-    tokens_added = 1;
-  } else if ( AbsDataVal <= MAX_SINGLE_TOKEN_VALUE ) {
-    TokenListPtr[0] = LOW_VAL_TOKENS + (AbsDataVal - DCT_VAL_CAT2_MIN);
-    if ( DataValue > 0 )
-      TokenListPtr[1] = 0;
-    else
-      TokenListPtr[1] = 1;
-    tokens_added = 2;
-  } else if ( AbsDataVal <= 8 ) {
-    /* Bit 1 determines sign, Bit 0 the value */
-    TokenListPtr[0] = DCT_VAL_CATEGORY3;
-    if ( DataValue > 0 )
-      TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT3_MIN);
-    else
-      TokenListPtr[1] = (0x02) + (AbsDataVal - DCT_VAL_CAT3_MIN);
-    tokens_added = 2;
-  } else if ( AbsDataVal <= 12 ) {
-    /* Bit 2 determines sign, Bit 0-2 the value */
-    TokenListPtr[0] = DCT_VAL_CATEGORY4;
-    if ( DataValue > 0 )
-      TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT4_MIN);
-    else
-      TokenListPtr[1] = (0x04) + (AbsDataVal - DCT_VAL_CAT4_MIN);
-    tokens_added = 2;
-  } else if ( AbsDataVal <= 20 ) {
-    /* Bit 3 determines sign, Bit 0-2 the value */
-    TokenListPtr[0] = DCT_VAL_CATEGORY5;
-    if ( DataValue > 0 )
-      TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT5_MIN);
-    else
-      TokenListPtr[1] = (0x08) + (AbsDataVal - DCT_VAL_CAT5_MIN);
-    tokens_added = 2;
-  } else if ( AbsDataVal <= 36 ) {
-    /* Bit 4 determines sign, Bit 0-3 the value */
-    TokenListPtr[0] = DCT_VAL_CATEGORY6;
-    if ( DataValue > 0 )
-      TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT6_MIN);
-    else
-      TokenListPtr[1] = (0x010) + (AbsDataVal - DCT_VAL_CAT6_MIN);
-    tokens_added = 2;
-  } else if ( AbsDataVal <= 68 ) {
-    /* Bit 5 determines sign, Bit 0-4 the value */
-    TokenListPtr[0] = DCT_VAL_CATEGORY7;
-    if ( DataValue > 0 )
-      TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT7_MIN);
-    else
-      TokenListPtr[1] = (0x20) + (AbsDataVal - DCT_VAL_CAT7_MIN);
-    tokens_added = 2;
-  } else if ( AbsDataVal <= 511 ) {
-    /* Bit 9 determines sign, Bit 0-8 the value */
-    TokenListPtr[0] = DCT_VAL_CATEGORY8;
-    if ( DataValue > 0 )
-      TokenListPtr[1] = (AbsDataVal - DCT_VAL_CAT8_MIN);
-    else
-      TokenListPtr[1] = (0x200) + (AbsDataVal - DCT_VAL_CAT8_MIN);
-    tokens_added = 2;
-  } else {
-    TokenListPtr[0] = DCT_VAL_CATEGORY8;
-    if ( DataValue > 0 )
-      TokenListPtr[1] = (511 - DCT_VAL_CAT8_MIN);
-    else
-      TokenListPtr[1] = (0x200) + (511 - DCT_VAL_CAT8_MIN);
-    tokens_added = 2;
-  }
-
-  /* Return the total number of tokens added */
-  return tokens_added;
-}
-
-static unsigned char TokenizeDctRunValue (unsigned char RunLength,
-                                          ogg_int16_t DataValue,
-                                          ogg_uint32_t * TokenListPtr ){
-  unsigned char tokens_added = 0;
-  ogg_uint32_t AbsDataVal = abs( (ogg_int32_t)DataValue );
-
-  /* Values are tokenised as category value and a number of additional
-     bits  that define the category.  */
-  if ( DataValue == 0 ) return 0;
-  if ( AbsDataVal == 1 ) {
-    /* Zero runs of 1-5 */
-    if ( RunLength <= 5 ) {
-      TokenListPtr[0] = DCT_RUN_CATEGORY1 + (RunLength - 1);
-      if ( DataValue > 0 )
-        TokenListPtr[1] = 0;
-      else
-        TokenListPtr[1] = 1;
-    } else if ( RunLength <= 9 ) {
-      /* Zero runs of 6-9 */
-      TokenListPtr[0] = DCT_RUN_CATEGORY1B;
-      if ( DataValue > 0 )
-        TokenListPtr[1] = (RunLength - 6);
-      else
-        TokenListPtr[1] = 0x04 + (RunLength - 6);
-    } else {
-      /* Zero runs of 10-17 */
-      TokenListPtr[0] = DCT_RUN_CATEGORY1C;
-      if ( DataValue > 0 )
-        TokenListPtr[1] = (RunLength - 10);
-      else
-        TokenListPtr[1] = 0x08 + (RunLength - 10);
-    }
-    tokens_added = 2;
-  } else if ( AbsDataVal <= 3 ) {
-    if ( RunLength == 1 ) {
-      TokenListPtr[0] = DCT_RUN_CATEGORY2;
-
-      /* Extra bits token bit 1 indicates sign, bit 0 indicates value */
-      if ( DataValue > 0 )
-        TokenListPtr[1] = (AbsDataVal - 2);
-      else
-        TokenListPtr[1] = (0x02) + (AbsDataVal - 2);
-      tokens_added = 2;
-    }else{
-      TokenListPtr[0] = DCT_RUN_CATEGORY2 + 1;
-
-      /* Extra bits token. */
-      /* bit 2 indicates sign, bit 1 indicates value, bit 0 indicates
-         run length */
-      if ( DataValue > 0 )
-        TokenListPtr[1] = ((AbsDataVal - 2) << 1) + (RunLength - 2);
-      else
-        TokenListPtr[1] = (0x04) + ((AbsDataVal - 2) << 1) + (RunLength - 2);
-      tokens_added = 2;
-    }
-  } else  {
-    tokens_added = 2;  /* ERROR */
-    /*IssueWarning( "Bad Input to TokenizeDctRunValue" );*/
-  }
-
-  /* Return the total number of tokens added */
-  return tokens_added;
-}
-
-static unsigned char TokenizeDctBlock (ogg_int16_t * RawData,
-                                       ogg_uint32_t * TokenListPtr ) {
-  ogg_uint32_t i;
-  unsigned char  run_count;
-  unsigned char  token_count = 0;     /* Number of tokens crated. */
-  ogg_uint32_t AbsData;
-
-
-  /* Tokenize the block */
-  for( i = 0; i < BLOCK_SIZE; i++ ){
-    run_count = 0;
-
-    /* Look for a zero run.  */
-    /* NOTE the use of & instead of && which is faster (and
-       equivalent) in this instance. */
-    /* NO, NO IT ISN'T --Monty */
-    while( (i < BLOCK_SIZE) && (!RawData[i]) ){
-      run_count++;
-      i++;
-    }
-
-    /* If we have reached the end of the block then code EOB */
-    if ( i == BLOCK_SIZE ){
-      TokenListPtr[token_count] = DCT_EOB_TOKEN;
-      token_count++;
-    }else{
-      /* If we have a short zero run followed by a low data value code
-         the two as a composite token. */
-      if ( run_count ){
-        AbsData = abs(RawData[i]);
-
-        if ( ((AbsData == 1) && (run_count <= 17)) ||
-             ((AbsData <= 3) && (run_count <= 3)) ) {
-          /* Tokenise the run and subsequent value combination value */
-          token_count += TokenizeDctRunValue( run_count,
-                                              RawData[i],
-                                              &TokenListPtr[token_count] );
-        }else{
-
-        /* Else if we have a long non-EOB run or a run followed by a
-           value token > MAX_RUN_VAL then code the run and token
-           seperately */
-          if ( run_count <= 8 )
-            TokenListPtr[token_count] = DCT_SHORT_ZRL_TOKEN;
-          else
-            TokenListPtr[token_count] = DCT_ZRL_TOKEN;
-
-          token_count++;
-          TokenListPtr[token_count] = run_count - 1;
-          token_count++;
-
-          /* Now tokenize the value */
-          token_count += TokenizeDctValue( RawData[i],
-                                           &TokenListPtr[token_count] );
-        }
-      }else{
-        /* Else there was NO zero run. */
-        /* Tokenise the value  */
-        token_count += TokenizeDctValue( RawData[i],
-                                         &TokenListPtr[token_count] );
-      }
-    }
-  }
-
-  /* Return the total number of tokens (including additional bits
-     tokens) used. */
-  return token_count;
-}
-
-ogg_uint32_t DPCMTokenizeBlock (CP_INSTANCE *cpi,
-                                ogg_int32_t FragIndex){
-  ogg_uint32_t  token_count;
-
-  if ( cpi->pb.FrameType == KEY_FRAME ){
-    /* Key frame so code block in INTRA mode. */
-    cpi->pb.CodingMode = CODE_INTRA;
-  }else{
-    /* Get Motion vector and mode for this block. */
-    cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex];
-  }
-
-  /* Tokenise the dct data. */
-  token_count = TokenizeDctBlock( cpi->pb.QFragData[FragIndex],
-                                  cpi->pb.TokenList[FragIndex] );
-
-  cpi->FragTokenCounts[FragIndex] = token_count;
-  cpi->TotTokenCount += token_count;
-
-  /* Return number of pixels coded (i.e. 8x8). */
-  return BLOCK_SIZE;
-}
-
-static int AllZeroDctData( Q_LIST_ENTRY * QuantList ){
-  ogg_uint32_t i;
-
-  for ( i = 0; i < 64; i ++ )
-    if ( QuantList[i] != 0 )
-      return 0;
-
-  return 1;
-}
-
-static void MotionBlockDifference (CP_INSTANCE * cpi, unsigned char * FiltPtr,
-                            ogg_int16_t *DctInputPtr, ogg_int32_t MvDevisor,
-                            unsigned char* old_ptr1, unsigned char* new_ptr1,
-                            ogg_uint32_t FragIndex,ogg_uint32_t PixelsPerLine,
-                            ogg_uint32_t ReconPixelsPerLine) {
-
-  ogg_int32_t MvShift;
-  ogg_int32_t MvModMask;
-  ogg_int32_t  AbsRefOffset;
-  ogg_int32_t  AbsXOffset;
-  ogg_int32_t  AbsYOffset;
-  ogg_int32_t  MVOffset;        /* Baseline motion vector offset */
-  ogg_int32_t  ReconPtr2Offset; /* Offset for second reconstruction in
-                                   half pixel MC */
-  unsigned char  *ReconPtr1;    /* DCT reconstructed image pointers */
-  unsigned char  *ReconPtr2;    /* Pointer used in half pixel MC */
-
-  switch(MvDevisor) {
-  case 2:
-    MvShift = 1;
-    MvModMask = 1;
-    break;
-  case 4:
-    MvShift = 2;
-    MvModMask = 3;
-    break;
-  default:
-    break;
-  }
-
-  cpi->MVector.x = cpi->pb.FragMVect[FragIndex].x;
-  cpi->MVector.y = cpi->pb.FragMVect[FragIndex].y;
-
-  /* Set up the baseline offset for the motion vector. */
-  MVOffset = ((cpi->MVector.y / MvDevisor) * ReconPixelsPerLine) +
-    (cpi->MVector.x / MvDevisor);
-
-  /* Work out the offset of the second reference position for 1/2
-     pixel interpolation.  For the U and V planes the MV specifies 1/4
-     pixel accuracy. This is adjusted to 1/2 pixel as follows ( 0->0,
-     1/4->1/2, 1/2->1/2, 3/4->1/2 ). */
-  ReconPtr2Offset = 0;
-  AbsXOffset = cpi->MVector.x % MvDevisor;
-  AbsYOffset = cpi->MVector.y % MvDevisor;
-
-  if ( AbsXOffset ) {
-    if ( cpi->MVector.x > 0 )
-      ReconPtr2Offset += 1;
-    else
-      ReconPtr2Offset -= 1;
-  }
-
-  if ( AbsYOffset ) {
-    if ( cpi->MVector.y > 0 )
-      ReconPtr2Offset += ReconPixelsPerLine;
-    else
-      ReconPtr2Offset -= ReconPixelsPerLine;
-  }
-
-  if ( cpi->pb.CodingMode==CODE_GOLDEN_MV ) {
-    ReconPtr1 = &cpi->
-      pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]];
-  } else {
-    ReconPtr1 = &cpi->
-      pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]];
-  }
-
-  ReconPtr1 += MVOffset;
-  ReconPtr2 =  ReconPtr1 + ReconPtr2Offset;
-
-  AbsRefOffset = abs((int)(ReconPtr1 - ReconPtr2));
-
-  /* Is the MV offset exactly pixel alligned */
-  if ( AbsRefOffset == 0 ){
-    dsp_sub8x8(cpi->dsp, FiltPtr, ReconPtr1, DctInputPtr,
-               PixelsPerLine, ReconPixelsPerLine);
-    dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
-  } else {
-    /* Fractional pixel MVs. */
-    /* Note that we only use two pixel values even for the diagonal */
-    dsp_sub8x8avg2(cpi->dsp, FiltPtr, ReconPtr1,ReconPtr2,DctInputPtr,
-                 PixelsPerLine, ReconPixelsPerLine);
-    dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
-  }
-}
-
-void TransformQuantizeBlock (CP_INSTANCE *cpi, ogg_int32_t FragIndex,
-                             ogg_uint32_t PixelsPerLine) {
-  unsigned char *new_ptr1;    /* Pointers into current frame */
-  unsigned char *old_ptr1;    /* Pointers into old frame */
-  unsigned char *FiltPtr;     /* Pointers to srf filtered pixels */
-  ogg_int16_t   *DctInputPtr; /* Pointer into buffer containing input to DCT */
-  int LeftEdge;               /* Flag if block at left edge of component */
-  ogg_uint32_t  ReconPixelsPerLine; /* Line length for recon buffers. */
-
-  unsigned char   *ReconPtr1;   /* DCT reconstructed image pointers */
-  ogg_int32_t   MvDevisor;      /* Defines MV resolution (2 = 1/2
-                                   pixel for Y or 4 = 1/4 for UV) */
-
-  new_ptr1 = &cpi->yuv1ptr[cpi->pb.pixel_index_table[FragIndex]];
-  old_ptr1 = &cpi->yuv0ptr[cpi->pb.pixel_index_table[FragIndex]];
-  DctInputPtr   = cpi->DCTDataBuffer;
-
-  /* Set plane specific values */
-  if (FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments){
-    ReconPixelsPerLine = cpi->pb.YStride;
-    MvDevisor = 2;                  /* 1/2 pixel accuracy in Y */
-  }else{
-    ReconPixelsPerLine = cpi->pb.UVStride;
-    MvDevisor = 4;                  /* UV planes at 1/2 resolution of Y */
-  }
-
-  /* adjusted / filtered pointers */
-  FiltPtr = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
-
-  if ( cpi->pb.FrameType == KEY_FRAME ) {
-    /* Key frame so code block in INTRA mode. */
-    cpi->pb.CodingMode = CODE_INTRA;
-  }else{
-    /* Get Motion vector and mode for this block. */
-    cpi->pb.CodingMode = cpi->pb.FragCodingMethod[FragIndex];
-  }
-
-  /* Selection of Quantiser matrix and set other plane related values. */
-  if ( FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments ){
-    LeftEdge = !(FragIndex%cpi->pb.HFragments);
-
-    /* Select the appropriate Y quantiser matrix */
-    if ( cpi->pb.CodingMode == CODE_INTRA )
-      select_quantiser(&cpi->pb, BLOCK_Y);
-    else
-      select_quantiser(&cpi->pb, BLOCK_INTER_Y);
-  } else {
-    LeftEdge = !((FragIndex-cpi->pb.YPlaneFragments)%(cpi->pb.HFragments>>1));
-
-    if(FragIndex < (ogg_int32_t)cpi->pb.YPlaneFragments + (ogg_int32_t)cpi->pb.UVPlaneFragments) {
-      /* U plane */
-      if ( cpi->pb.CodingMode == CODE_INTRA )
-        select_quantiser(&cpi->pb, BLOCK_U);
-      else
-        select_quantiser(&cpi->pb, BLOCK_INTER_U);
-    } else {
-      /* V plane */
-      if ( cpi->pb.CodingMode == CODE_INTRA )
-        select_quantiser(&cpi->pb, BLOCK_V);
-      else
-        select_quantiser(&cpi->pb, BLOCK_INTER_V);
-    }
-  }
-
-  if ( ModeUsesMC[cpi->pb.CodingMode] ){
-
-    MotionBlockDifference(cpi, FiltPtr, DctInputPtr, MvDevisor,
-                          old_ptr1, new_ptr1, FragIndex, PixelsPerLine,
-                          ReconPixelsPerLine);
-
-  } else if ( (cpi->pb.CodingMode==CODE_INTER_NO_MV ) ||
-              ( cpi->pb.CodingMode==CODE_USING_GOLDEN ) ) {
-    if ( cpi->pb.CodingMode==CODE_INTER_NO_MV ) {
-      ReconPtr1 = &cpi->
-        pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[FragIndex]];
-    } else {
-      ReconPtr1 = &cpi->
-        pb.GoldenFrame[cpi->pb.recon_pixel_index_table[FragIndex]];
-    }
-
-    dsp_sub8x8(cpi->dsp, FiltPtr, ReconPtr1, DctInputPtr,
-               PixelsPerLine, ReconPixelsPerLine);
-    dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
-  } else if ( cpi->pb.CodingMode==CODE_INTRA ) {
-    dsp_sub8x8_128(cpi->dsp, FiltPtr, DctInputPtr, PixelsPerLine);
-    dsp_copy8x8 (cpi->dsp, new_ptr1, old_ptr1, PixelsPerLine);
-  }
-
-  /* Proceed to encode the data into the encode buffer if the encoder
-     is enabled. */
-  /* Perform a 2D DCT transform on the data. */
-  dsp_fdct_short(cpi->dsp, cpi->DCTDataBuffer, cpi->DCT_codes );
-
-  /* Quantize that transform data. */
-  quantize ( &cpi->pb, cpi->DCT_codes, cpi->pb.QFragData[FragIndex] );
-
-  if ( (cpi->pb.CodingMode == CODE_INTER_NO_MV) &&
-       ( AllZeroDctData(cpi->pb.QFragData[FragIndex]) ) ) {
-    cpi->pb.display_fragments[FragIndex] = 0;
-  }
-
-}

+ 0 - 422
Engine/lib/libtheora/lib/enc/dsp.c

@@ -1,422 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dsp.c 15427 2008-10-21 02:36:19Z xiphmont $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include "codec_internal.h"
-#include "../cpu.c"
-
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-
-static void sub8x8__c (unsigned char *FiltPtr, unsigned char *ReconPtr,
-                  ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
-                  ogg_uint32_t ReconPixelsPerLine) {
-  int i;
-
-  /* For each block row */
-  for (i=8; i; i--) {
-    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);
-    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);
-    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);
-    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);
-    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);
-    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);
-    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);
-    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);
-
-    /* Start next row */
-    FiltPtr += PixelsPerLine;
-    ReconPtr += ReconPixelsPerLine;
-    DctInputPtr += 8;
-  }
-}
-
-static void sub8x8_128__c (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
-                      ogg_uint32_t PixelsPerLine) {
-  int i;
-  /* For each block row */
-  for (i=8; i; i--) {
-    /* INTRA mode so code raw image data */
-    /* We convert the data to 8 bit signed (by subtracting 128) as
-       this reduces the internal precision requirments in the DCT
-       transform. */
-    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);
-    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);
-    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);
-    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);
-    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);
-    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);
-    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);
-    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);
-
-    /* Start next row */
-    FiltPtr += PixelsPerLine;
-    DctInputPtr += 8;
-  }
-}
-
-static void sub8x8avg2__c (unsigned char *FiltPtr, unsigned char *ReconPtr1,
-                     unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
-                     ogg_uint32_t PixelsPerLine,
-                     ogg_uint32_t ReconPixelsPerLine)
-{
-  int i;
-
-  /* For each block row */
-  for (i=8; i; i--) {
-    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
-    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
-    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
-    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
-    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
-    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
-    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
-    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
-
-    /* Start next row */
-    FiltPtr += PixelsPerLine;
-    ReconPtr1 += ReconPixelsPerLine;
-    ReconPtr2 += ReconPixelsPerLine;
-    DctInputPtr += 8;
-  }
-}
-
-static ogg_uint32_t row_sad8__c (unsigned char *Src1, unsigned char *Src2)
-{
-  ogg_uint32_t SadValue;
-  ogg_uint32_t SadValue1;
-
-  SadValue    = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) +
-                DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
-                DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
-                DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
-
-  SadValue1   = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) +
-                DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
-                DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
-                DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
-
-  SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
-
-  return SadValue;
-}
-
-static ogg_uint32_t col_sad8x8__c (unsigned char *Src1, unsigned char *Src2,
-                        ogg_uint32_t stride)
-{
-  ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
-  ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
-  ogg_uint32_t MaxSad = 0;
-  ogg_uint32_t i;
-
-  for ( i = 0; i < 4; i++ ){
-    SadValue[0] += abs(Src1[0] - Src2[0]);
-    SadValue[1] += abs(Src1[1] - Src2[1]);
-    SadValue[2] += abs(Src1[2] - Src2[2]);
-    SadValue[3] += abs(Src1[3] - Src2[3]);
-    SadValue[4] += abs(Src1[4] - Src2[4]);
-    SadValue[5] += abs(Src1[5] - Src2[5]);
-    SadValue[6] += abs(Src1[6] - Src2[6]);
-    SadValue[7] += abs(Src1[7] - Src2[7]);
-
-    Src1 += stride;
-    Src2 += stride;
-  }
-
-  for ( i = 0; i < 4; i++ ){
-    SadValue2[0] += abs(Src1[0] - Src2[0]);
-    SadValue2[1] += abs(Src1[1] - Src2[1]);
-    SadValue2[2] += abs(Src1[2] - Src2[2]);
-    SadValue2[3] += abs(Src1[3] - Src2[3]);
-    SadValue2[4] += abs(Src1[4] - Src2[4]);
-    SadValue2[5] += abs(Src1[5] - Src2[5]);
-    SadValue2[6] += abs(Src1[6] - Src2[6]);
-    SadValue2[7] += abs(Src1[7] - Src2[7]);
-
-    Src1 += stride;
-    Src2 += stride;
-  }
-
-  for ( i = 0; i < 8; i++ ){
-    if ( SadValue[i] > MaxSad )
-      MaxSad = SadValue[i];
-    if ( SadValue2[i] > MaxSad )
-      MaxSad = SadValue2[i];
-  }
-
-  return MaxSad;
-}
-
-static ogg_uint32_t sad8x8__c (unsigned char *ptr1, ogg_uint32_t stride1,
-                 unsigned char *ptr2, ogg_uint32_t stride2)
-{
-  ogg_uint32_t  i;
-  ogg_uint32_t  sad = 0;
-
-  for (i=8; i; i--) {
-    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
-    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
-    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
-    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
-    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
-    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
-    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
-    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
-    /* Step to next row of block. */
-    ptr1 += stride1;
-    ptr2 += stride2;
-  }
-
-  return sad;
-}
-
-static ogg_uint32_t sad8x8_thres__c (unsigned char *ptr1, ogg_uint32_t stride1,
-                 unsigned char *ptr2, ogg_uint32_t stride2,
-             ogg_uint32_t thres)
-{
-  ogg_uint32_t  i;
-  ogg_uint32_t  sad = 0;
-
-  for (i=8; i; i--) {
-    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
-    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
-    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
-    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
-    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
-    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
-    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
-    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
-    if (sad > thres )
-      break;
-
-    /* Step to next row of block. */
-    ptr1 += stride1;
-    ptr2 += stride2;
-  }
-
-  return sad;
-}
-
-static ogg_uint32_t sad8x8_xy2_thres__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                          unsigned char *RefDataPtr1,
-                    unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
-                    ogg_uint32_t thres)
-{
-  ogg_uint32_t  i;
-  ogg_uint32_t  sad = 0;
-
-  for (i=8; i; i--) {
-    sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
-    sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
-    sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
-    sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
-    sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
-    sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
-    sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
-    sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
-
-    if ( sad > thres )
-      break;
-
-    /* Step to next row of block. */
-    SrcData += SrcStride;
-    RefDataPtr1 += RefStride;
-    RefDataPtr2 += RefStride;
-  }
-
-  return sad;
-}
-
-static ogg_uint32_t intra8x8_err__c (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
-  ogg_uint32_t  i;
-  ogg_uint32_t  XSum=0;
-  ogg_uint32_t  XXSum=0;
-
-  for (i=8; i; i--) {
-     /* Examine alternate pixel locations. */
-     XSum += DataPtr[0];
-     XXSum += DataPtr[0]*DataPtr[0];
-     XSum += DataPtr[1];
-     XXSum += DataPtr[1]*DataPtr[1];
-     XSum += DataPtr[2];
-     XXSum += DataPtr[2]*DataPtr[2];
-     XSum += DataPtr[3];
-     XXSum += DataPtr[3]*DataPtr[3];
-     XSum += DataPtr[4];
-     XXSum += DataPtr[4]*DataPtr[4];
-     XSum += DataPtr[5];
-     XXSum += DataPtr[5]*DataPtr[5];
-     XSum += DataPtr[6];
-     XXSum += DataPtr[6]*DataPtr[6];
-     XSum += DataPtr[7];
-     XXSum += DataPtr[7]*DataPtr[7];
-
-     /* Step to next row of block. */
-     DataPtr += Stride;
-   }
-
-   /* Compute population variance as mis-match metric. */
-   return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                     unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
-  ogg_uint32_t  i;
-  ogg_uint32_t  XSum=0;
-  ogg_uint32_t  XXSum=0;
-  ogg_int32_t   DiffVal;
-
-  for (i=8; i; i--) {
-    DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    /* Step to next row of block. */
-    SrcData += SrcStride;
-    RefDataPtr += RefStride;
-  }
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static ogg_uint32_t inter8x8_err_xy2__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                         unsigned char *RefDataPtr1,
-             unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
-  ogg_uint32_t  i;
-  ogg_uint32_t  XSum=0;
-  ogg_uint32_t  XXSum=0;
-  ogg_int32_t   DiffVal;
-
-  for (i=8; i; i--) {
-    DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    /* Step to next row of block. */
-    SrcData += SrcStride;
-    RefDataPtr1 += RefStride;
-    RefDataPtr2 += RefStride;
-  }
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static void nop (void) { /* NOP */ }
-
-void dsp_init(DspFunctions *funcs)
-{
-  funcs->save_fpu = nop;
-  funcs->restore_fpu = nop;
-  funcs->sub8x8 = sub8x8__c;
-  funcs->sub8x8_128 = sub8x8_128__c;
-  funcs->sub8x8avg2 = sub8x8avg2__c;
-  funcs->row_sad8 = row_sad8__c;
-  funcs->col_sad8x8 = col_sad8x8__c;
-  funcs->sad8x8 = sad8x8__c;
-  funcs->sad8x8_thres = sad8x8_thres__c;
-  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__c;
-  funcs->intra8x8_err = intra8x8_err__c;
-  funcs->inter8x8_err = inter8x8_err__c;
-  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__c;
-}
-
-void dsp_static_init(DspFunctions *funcs)
-{
-  ogg_uint32_t cpuflags;
-
-  cpuflags = oc_cpu_flags_get ();
-  dsp_init (funcs);
-
-  dsp_recon_init (funcs, cpuflags);
-  dsp_dct_init (funcs, cpuflags);
-#if defined(USE_ASM)
-  if (cpuflags & OC_CPU_X86_MMX) {
-    dsp_mmx_init(funcs);
-  }
-# ifndef WIN32
-  /* This is implemented for win32 yet */
-  if (cpuflags & OC_CPU_X86_MMXEXT) {
-    dsp_mmxext_init(funcs);
-  }
-# endif
-#endif
-}
-

+ 0 - 166
Engine/lib/libtheora/lib/enc/dsp.h

@@ -1,166 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dsp.h 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#ifndef DSP_H
-#define DSP_H
-
-#include "theora/theora.h"
-#include "../cpu.h"
-
-typedef struct
-{
-  void   (*save_fpu)            (void);
-  void   (*restore_fpu)         (void);
-
-  void   (*sub8x8)      (unsigned char *FiltPtr, unsigned char *ReconPtr,
-                        ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
-         ogg_uint32_t ReconPixelsPerLine);
-
-  void   (*sub8x8_128)     (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
-               ogg_uint32_t PixelsPerLine);
-
-  void   (*sub8x8avg2)     (unsigned char *FiltPtr, unsigned char *ReconPtr1,
-                     unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
-               ogg_uint32_t PixelsPerLine,
-               ogg_uint32_t ReconPixelsPerLine);
-
-  void   (*copy8x8)      (unsigned char *src, unsigned char *dest,
-                     ogg_uint32_t stride);
-
-  void   (*recon_intra8x8)    (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-                     ogg_uint32_t LineStep);
-
-  void   (*recon_inter8x8)    (unsigned char *ReconPtr, unsigned char *RefPtr,
-                     ogg_int16_t *ChangePtr, ogg_uint32_t LineStep);
-
-  void   (*recon_inter8x8_half)  (unsigned char *ReconPtr, unsigned char *RefPtr1,
-           unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-         ogg_uint32_t LineStep);
-
-  void   (*fdct_short)          (ogg_int16_t *InputData, ogg_int16_t *OutputData);
-
-  ogg_uint32_t (*row_sad8)  (unsigned char *Src1, unsigned char *Src2);
-
-  ogg_uint32_t (*col_sad8x8)  (unsigned char *Src1, unsigned char *Src2,
-           ogg_uint32_t stride);
-
-  ogg_uint32_t (*sad8x8)  (unsigned char *ptr1, ogg_uint32_t stride1,
-               unsigned char *ptr2, ogg_uint32_t stride2);
-
-  ogg_uint32_t (*sad8x8_thres)  (unsigned char *ptr1, ogg_uint32_t stride1,
-                unsigned char *ptr2, ogg_uint32_t stride2,
-         ogg_uint32_t thres);
-
-  ogg_uint32_t (*sad8x8_xy2_thres)(unsigned char *SrcData, ogg_uint32_t SrcStride,
-                     unsigned char *RefDataPtr1,
-               unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
-         ogg_uint32_t thres);
-
-  ogg_uint32_t (*intra8x8_err)  (unsigned char *DataPtr, ogg_uint32_t Stride);
-
-  ogg_uint32_t (*inter8x8_err)  (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                     unsigned char *RefDataPtr, ogg_uint32_t RefStride);
-
-  ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, ogg_uint32_t SrcStride,
-                     unsigned char *RefDataPtr1,
-               unsigned char *RefDataPtr2, ogg_uint32_t RefStride);
-
-  void (*LoopFilter) (PB_INSTANCE *pbi, int FLimit);
-
-  void (*FilterVert) (unsigned char * PixelPtr,
-                 ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
-
-   void (*IDctSlow) (ogg_int16_t *InputData,
-                  ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
-
-    void (*IDct3) (ogg_int16_t *InputData,
-                   ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
-
-    void (*IDct10) (ogg_int16_t *InputData,
-                  ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
-} DspFunctions;
-
-extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
-extern void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
-extern void dsp_dct_decode_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
-extern void dsp_idct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
-
-void dsp_init(DspFunctions *funcs);
-void dsp_static_init(DspFunctions *funcs);
-#if defined(USE_ASM) && (defined(__i386__) || defined(__x86_64__) || defined(WIN32))
-extern void dsp_mmx_init(DspFunctions *funcs);
-extern void dsp_mmxext_init(DspFunctions *funcs);
-extern void dsp_mmx_fdct_init(DspFunctions *funcs);
-extern void dsp_mmx_recon_init(DspFunctions *funcs);
-extern void dsp_mmx_dct_decode_init(DspFunctions *funcs);
-extern void dsp_mmx_idct_init(DspFunctions *funcs);
-#endif
-
-#define dsp_save_fpu(funcs) (funcs.save_fpu ())
-
-#define dsp_restore_fpu(funcs) (funcs.restore_fpu ())
-
-#define dsp_sub8x8(funcs,a1,a2,a3,a4,a5) (funcs.sub8x8 (a1,a2,a3,a4,a5))
-
-#define dsp_sub8x8_128(funcs,a1,a2,a3) (funcs.sub8x8_128 (a1,a2,a3))
-
-#define dsp_sub8x8avg2(funcs,a1,a2,a3,a4,a5,a6) (funcs.sub8x8avg2 (a1,a2,a3,a4,a5,a6))
-
-#define dsp_copy8x8(funcs,ptr1,ptr2,str1) (funcs.copy8x8 (ptr1,ptr2,str1))
-
-#define dsp_recon_intra8x8(funcs,ptr1,ptr2,str1) (funcs.recon_intra8x8 (ptr1,ptr2,str1))
-
-#define dsp_recon_inter8x8(funcs,ptr1,ptr2,ptr3,str1) \
-  (funcs.recon_inter8x8 (ptr1,ptr2,ptr3,str1))
-
-#define dsp_recon_inter8x8_half(funcs,ptr1,ptr2,ptr3,ptr4,str1) \
-  (funcs.recon_inter8x8_half (ptr1,ptr2,ptr3,ptr4,str1))
-
-#define dsp_fdct_short(funcs,in,out) (funcs.fdct_short (in,out))
-
-#define dsp_row_sad8(funcs,ptr1,ptr2) (funcs.row_sad8 (ptr1,ptr2))
-
-#define dsp_col_sad8x8(funcs,ptr1,ptr2,str1) (funcs.col_sad8x8 (ptr1,ptr2,str1))
-
-#define dsp_sad8x8(funcs,ptr1,str1,ptr2,str2) (funcs.sad8x8 (ptr1,str1,ptr2,str2))
-
-#define dsp_sad8x8_thres(funcs,ptr1,str1,ptr2,str2,t) (funcs.sad8x8_thres (ptr1,str1,ptr2,str2,t))
-
-#define dsp_sad8x8_xy2_thres(funcs,ptr1,str1,ptr2,ptr3,str2,t) \
-  (funcs.sad8x8_xy2_thres (ptr1,str1,ptr2,ptr3,str2,t))
-
-#define dsp_intra8x8_err(funcs,ptr1,str1) (funcs.intra8x8_err (ptr1,str1))
-
-#define dsp_inter8x8_err(funcs,ptr1,str1,ptr2,str2) \
-  (funcs.inter8x8_err (ptr1,str1,ptr2,str2))
-
-#define dsp_inter8x8_err_xy2(funcs,ptr1,str1,ptr2,ptr3,str2) \
-  (funcs.inter8x8_err_xy2 (ptr1,str1,ptr2,ptr3,str2))
-
-#define dsp_LoopFilter(funcs, ptr1, i) \
-  (funcs.LoopFilter(ptr1, i))
-
-#define dsp_IDctSlow(funcs, ptr1, ptr2, ptr3) \
-    (funcs.IDctSlow(ptr1, ptr2, ptr3))
-
-#define dsp_IDct3(funcs, ptr1, ptr2, ptr3) \
-    (funcs.IDctSlow(ptr1, ptr2, ptr3))
-
-#define dsp_IDct10(funcs, ptr1, ptr2, ptr3) \
-   (funcs.IDctSlow(ptr1, ptr2, ptr3))
-
-#endif /* DSP_H */

+ 0 - 1479
Engine/lib/libtheora/lib/enc/encode.c

@@ -1,1479 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: encode.c 15383 2008-10-10 14:33:46Z xiphmont $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include "codec_internal.h"
-#include "encoder_lookup.h"
-#include "block_inline.h"
-
-#define PUR 8
-#define PU 4
-#define PUL 2
-#define PL 1
-#define HIGHBITDUPPED(X) (((ogg_int16_t) X)  >> 15)
-
-static ogg_uint32_t QuadCodeComponent ( CP_INSTANCE *cpi,
-                                        ogg_uint32_t FirstSB,
-                                        ogg_uint32_t SBRows,
-                                        ogg_uint32_t SBCols,
-                                        ogg_uint32_t PixelsPerLine){
-
-  ogg_int32_t   FragIndex;      /* Fragment number */
-  ogg_uint32_t  MB, B;          /* Macro-Block, Block indices */
-  ogg_uint32_t  SBrow;          /* Super-Block row number */
-  ogg_uint32_t  SBcol;          /* Super-Block row number */
-  ogg_uint32_t  SB=FirstSB;     /* Super-Block index, initialised to first
-                                   of this component */
-  ogg_uint32_t  coded_pixels=0; /* Number of pixels coded */
-  int           MBCodedFlag;
-
-  /* actually transform and quantize the image now that we've decided
-     on the modes Parse in quad-tree ordering */
-
-  for ( SBrow=0; SBrow<SBRows; SBrow++ ) {
-    for ( SBcol=0; SBcol<SBCols; SBcol++ ) {
-      /* Check its four Macro-Blocks  */
-      /* 'Macro-Block' is a misnomer in the chroma planes; this is
-         really just a Hilbert curve iterator */
-      for ( MB=0; MB<4; MB++ ) {
-
-        if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) >= 0 ) {
-
-          MBCodedFlag = 0;
-
-          /*  Now actually code the blocks */
-          for ( B=0; B<4; B++ ) {
-            FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
-
-            /* Does Block lie in frame: */
-            if ( FragIndex >= 0 ) {
-
-              /* In Frame: Is it coded: */
-              if ( cpi->pb.display_fragments[FragIndex] ) {
-
-                /* transform and quantize block */
-                TransformQuantizeBlock( cpi, FragIndex, PixelsPerLine );
-
-                /* Has the block got struck off (no MV and no data
-                   generated after DCT) If not then mark it and the
-                   assosciated MB as coded. */
-                if ( cpi->pb.display_fragments[FragIndex] ) {
-                  /* Create linear list of coded block indices */
-                  cpi->pb.CodedBlockList[cpi->pb.CodedBlockIndex] = FragIndex;
-                  cpi->pb.CodedBlockIndex++;
-
-                  /* MB is still coded */
-                  MBCodedFlag = 1;
-                  cpi->MBCodingMode = cpi->pb.FragCodingMethod[FragIndex];
-
-                }
-              }
-            }
-          }
-          /* If the MB is marked as coded and we are in the Y plane then */
-          /* the mode list needs to be updated. */
-          if ( MBCodedFlag && (FirstSB == 0) ){
-            /* Make a note of the selected mode in the mode list */
-            cpi->ModeList[cpi->ModeListCount] = cpi->MBCodingMode;
-            cpi->ModeListCount++;
-          }
-        }
-      }
-
-      SB++;
-
-    }
-  }
-
-  /* Return number of pixels coded */
-  return coded_pixels;
-}
-
-static void EncodeDcTokenList (CP_INSTANCE *cpi) {
-  ogg_int32_t   i,j;
-  ogg_uint32_t  Token;
-  ogg_uint32_t  ExtraBitsToken;
-  ogg_uint32_t  HuffIndex;
-
-  ogg_uint32_t  BestDcBits;
-  ogg_uint32_t  DcHuffChoice[2];
-  ogg_uint32_t  EntropyTableBits[2][DC_HUFF_CHOICES];
-
-  oggpack_buffer *opb=cpi->oggbuffer;
-
-  /* Clear table data structure */
-  memset ( EntropyTableBits, 0, sizeof(ogg_uint32_t)*DC_HUFF_CHOICES*2 );
-
-  /* Analyse token list to see which is the best entropy table to use */
-  for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
-    /* Count number of bits for each table option */
-    Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
-    for ( j = 0; j < DC_HUFF_CHOICES; j++ ){
-      EntropyTableBits[cpi->OptimisedTokenListPl[i]][j] +=
-        cpi->pb.HuffCodeLengthArray_VP3x[DC_HUFF_OFFSET + j][Token];
-    }
-  }
-
-  /* Work out which table option is best for Y */
-  BestDcBits = EntropyTableBits[0][0];
-  DcHuffChoice[0] = 0;
-  for ( j = 1; j < DC_HUFF_CHOICES; j++ ) {
-    if ( EntropyTableBits[0][j] < BestDcBits ) {
-      BestDcBits = EntropyTableBits[0][j];
-      DcHuffChoice[0] = j;
-    }
-  }
-
-  /* Add the DC huffman table choice to the bitstream */
-  oggpackB_write( opb, DcHuffChoice[0], DC_HUFF_CHOICE_BITS );
-
-  /* Work out which table option is best for UV */
-  BestDcBits = EntropyTableBits[1][0];
-  DcHuffChoice[1] = 0;
-  for ( j = 1; j < DC_HUFF_CHOICES; j++ ) {
-    if ( EntropyTableBits[1][j] < BestDcBits ) {
-      BestDcBits = EntropyTableBits[1][j];
-      DcHuffChoice[1] = j;
-    }
-  }
-
-  /* Add the DC huffman table choice to the bitstream */
-  oggpackB_write( opb, DcHuffChoice[1], DC_HUFF_CHOICE_BITS );
-
-  /* Encode the token list */
-  for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
-
-    /* Get the token and extra bits */
-    Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
-    ExtraBitsToken = (ogg_uint32_t)cpi->OptimisedTokenListEb[i];
-
-    /* Select the huffman table */
-    if ( cpi->OptimisedTokenListPl[i] == 0)
-      HuffIndex = (ogg_uint32_t)DC_HUFF_OFFSET + (ogg_uint32_t)DcHuffChoice[0];
-    else
-      HuffIndex = (ogg_uint32_t)DC_HUFF_OFFSET + (ogg_uint32_t)DcHuffChoice[1];
-
-    /* Add the bits to the encode holding buffer. */
-    cpi->FrameBitCount += cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex][Token];
-    oggpackB_write( opb, cpi->pb.HuffCodeArray_VP3x[HuffIndex][Token],
-                     (ogg_uint32_t)cpi->
-                     pb.HuffCodeLengthArray_VP3x[HuffIndex][Token] );
-
-    /* If the token is followed by an extra bits token then code it */
-    if ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 ) {
-      /* Add the bits to the encode holding buffer.  */
-      cpi->FrameBitCount += cpi->pb.ExtraBitLengths_VP3x[Token];
-      oggpackB_write( opb, ExtraBitsToken,
-                       (ogg_uint32_t)cpi->pb.ExtraBitLengths_VP3x[Token] );
-    }
-
-  }
-
-  /* Reset the count of second order optimised tokens */
-  cpi->OptimisedTokenCount = 0;
-}
-
-static void EncodeAcTokenList (CP_INSTANCE *cpi) {
-  ogg_int32_t   i,j;
-  ogg_uint32_t  Token;
-  ogg_uint32_t  ExtraBitsToken;
-  ogg_uint32_t  HuffIndex;
-
-  ogg_uint32_t  BestAcBits;
-  ogg_uint32_t  AcHuffChoice[2];
-  ogg_uint32_t  EntropyTableBits[2][AC_HUFF_CHOICES];
-
-  oggpack_buffer *opb=cpi->oggbuffer;
-
-  memset ( EntropyTableBits, 0, sizeof(ogg_uint32_t)*AC_HUFF_CHOICES*2 );
-
-  /* Analyse token list to see which is the best entropy table to use */
-  for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
-    /* Count number of bits for each table option */
-    Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
-    HuffIndex = cpi->OptimisedTokenListHi[i];
-    for ( j = 0; j < AC_HUFF_CHOICES; j++ ) {
-      EntropyTableBits[cpi->OptimisedTokenListPl[i]][j] +=
-        cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex + j][Token];
-    }
-  }
-
-  /* Select the best set of AC tables for Y */
-  BestAcBits = EntropyTableBits[0][0];
-  AcHuffChoice[0] = 0;
-  for ( j = 1; j < AC_HUFF_CHOICES; j++ ) {
-    if ( EntropyTableBits[0][j] < BestAcBits ) {
-      BestAcBits = EntropyTableBits[0][j];
-      AcHuffChoice[0] = j;
-    }
-  }
-
-  /* Add the AC-Y huffman table choice to the bitstream */
-  oggpackB_write( opb, AcHuffChoice[0], AC_HUFF_CHOICE_BITS );
-
-  /* Select the best set of AC tables for UV */
-  BestAcBits = EntropyTableBits[1][0];
-  AcHuffChoice[1] = 0;
-  for ( j = 1; j < AC_HUFF_CHOICES; j++ ) {
-    if ( EntropyTableBits[1][j] < BestAcBits ) {
-      BestAcBits = EntropyTableBits[1][j];
-      AcHuffChoice[1] = j;
-    }
-  }
-
-  /* Add the AC-UV huffman table choice to the bitstream */
-  oggpackB_write( opb, AcHuffChoice[1], AC_HUFF_CHOICE_BITS );
-
-  /* Encode the token list */
-  for ( i = 0; i < cpi->OptimisedTokenCount; i++ ) {
-    /* Get the token and extra bits */
-    Token = (ogg_uint32_t)cpi->OptimisedTokenList[i];
-    ExtraBitsToken = (ogg_uint32_t)cpi->OptimisedTokenListEb[i];
-
-    /* Select the huffman table */
-    HuffIndex = (ogg_uint32_t)cpi->OptimisedTokenListHi[i] +
-      AcHuffChoice[cpi->OptimisedTokenListPl[i]];
-
-    /* Add the bits to the encode holding buffer. */
-    cpi->FrameBitCount += cpi->pb.HuffCodeLengthArray_VP3x[HuffIndex][Token];
-    oggpackB_write( opb, cpi->pb.HuffCodeArray_VP3x[HuffIndex][Token],
-                     (ogg_uint32_t)cpi->
-                     pb.HuffCodeLengthArray_VP3x[HuffIndex][Token] );
-
-    /* If the token is followed by an extra bits token then code it */
-    if ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 ) {
-      /* Add the bits to the encode holding buffer. */
-      cpi->FrameBitCount += cpi->pb.ExtraBitLengths_VP3x[Token];
-      oggpackB_write( opb, ExtraBitsToken,
-                       (ogg_uint32_t)cpi->pb.ExtraBitLengths_VP3x[Token] );
-    }
-  }
-
-  /* Reset the count of second order optimised tokens */
-  cpi->OptimisedTokenCount = 0;
-}
-
-static void PackModes (CP_INSTANCE *cpi) {
-  ogg_uint32_t    i,j;
-  unsigned char   ModeIndex;
-  const unsigned char  *SchemeList;
-
-  unsigned char   BestModeSchemes[MAX_MODES];
-  ogg_int32_t     ModeCount[MAX_MODES];
-  ogg_int32_t     TmpFreq = -1;
-  ogg_int32_t     TmpIndex = -1;
-
-  ogg_uint32_t    BestScheme;
-  ogg_uint32_t    BestSchemeScore;
-  ogg_uint32_t    SchemeScore;
-
-  oggpack_buffer *opb=cpi->oggbuffer;
-
-  /* Build a frequency map for the modes in this frame */
-  memset( ModeCount, 0, MAX_MODES*sizeof(ogg_int32_t) );
-  for ( i = 0; i < cpi->ModeListCount; i++ )
-    ModeCount[cpi->ModeList[i]] ++;
-
-  /* Order the modes from most to least frequent.  Store result as
-     scheme 0 */
-  for ( j = 0; j < MAX_MODES; j++ ) {
-    TmpFreq = -1;  /* need to re-initialize for each loop */
-    /* Find the most frequent */
-    for ( i = 0; i < MAX_MODES; i++ ) {
-      /* Is this the best scheme so far ??? */
-      if ( ModeCount[i] > TmpFreq ) {
-        TmpFreq = ModeCount[i];
-        TmpIndex = i;
-      }
-    }
-    /* I don't know if the above loop ever fails to match, but it's
-       better safe than sorry.  Plus this takes care of gcc warning */
-    if ( TmpIndex != -1 ) {
-      ModeCount[TmpIndex] = -1;
-      BestModeSchemes[TmpIndex] = (unsigned char)j;
-    }
-  }
-
-  /* Default/ fallback scheme uses MODE_BITS bits per mode entry */
-  BestScheme = (MODE_METHODS - 1);
-  BestSchemeScore = cpi->ModeListCount * 3;
-  /* Get a bit score for the available schemes. */
-  for (  j = 0; j < (MODE_METHODS - 1); j++ ) {
-
-    /* Reset the scheme score */
-    if ( j == 0 ){
-      /* Scheme 0 additional cost of sending frequency order */
-      SchemeScore = 24;
-      SchemeList = BestModeSchemes;
-    } else {
-      SchemeScore = 0;
-      SchemeList = ModeSchemes[j-1];
-    }
-
-    /* Find the total bits to code using each avaialable scheme */
-    for ( i = 0; i < cpi->ModeListCount; i++ )
-      SchemeScore += ModeBitLengths[SchemeList[cpi->ModeList[i]]];
-
-    /* Is this the best scheme so far ??? */
-    if ( SchemeScore < BestSchemeScore ) {
-      BestSchemeScore = SchemeScore;
-      BestScheme = j;
-    }
-  }
-
-  /* Encode the best scheme. */
-  oggpackB_write( opb, BestScheme, (ogg_uint32_t)MODE_METHOD_BITS );
-
-  /* If the chosen schems is scheme 0 send details of the mode
-     frequency order */
-  if ( BestScheme == 0 ) {
-    for ( j = 0; j < MAX_MODES; j++ ){
-      /* Note that the last two entries are implicit */
-      oggpackB_write( opb, BestModeSchemes[j], (ogg_uint32_t)MODE_BITS );
-    }
-    SchemeList = BestModeSchemes;
-  }
-  else {
-    SchemeList = ModeSchemes[BestScheme-1];
-  }
-
-  /* Are we using one of the alphabet based schemes or the fallback scheme */
-  if ( BestScheme < (MODE_METHODS - 1)) {
-    /* Pack and encode the Mode list */
-    for ( i = 0; i < cpi->ModeListCount; i++) {
-      /* Add the appropriate mode entropy token. */
-      ModeIndex = SchemeList[cpi->ModeList[i]];
-      oggpackB_write( opb, ModeBitPatterns[ModeIndex],
-                      (ogg_uint32_t)ModeBitLengths[ModeIndex] );
-    }
-  }else{
-    /* Fall back to MODE_BITS per entry */
-    for ( i = 0; i < cpi->ModeListCount; i++)
-      /* Add the appropriate mode entropy token. */
-      oggpackB_write( opb, cpi->ModeList[i], MODE_BITS  );
-  }
-
-}
-
-static void PackMotionVectors (CP_INSTANCE *cpi) {
-  ogg_int32_t  i;
-  ogg_uint32_t MethodBits[2] = {0,0};
-  const ogg_uint32_t * MvBitsPtr;
-  const ogg_uint32_t * MvPatternPtr;
-
-  oggpack_buffer *opb=cpi->oggbuffer;
-
-  /* Choose the coding method */
-  MvBitsPtr = &MvBits[MAX_MV_EXTENT];
-  for ( i = 0; i < (ogg_int32_t)cpi->MvListCount; i++ ) {
-    MethodBits[0] += MvBitsPtr[cpi->MVList[i].x];
-    MethodBits[0] += MvBitsPtr[cpi->MVList[i].y];
-    MethodBits[1] += 12; /* Simple six bits per mv component fallback
-                             mechanism */
-  }
-
-  /* Select entropy table */
-  if ( MethodBits[0] < MethodBits[1] ) {
-    oggpackB_write( opb, 0, 1 );
-    MvBitsPtr = &MvBits[MAX_MV_EXTENT];
-    MvPatternPtr = &MvPattern[MAX_MV_EXTENT];
-  }else{
-    oggpackB_write( opb, 1, 1 );
-    MvBitsPtr = &MvBits2[MAX_MV_EXTENT];
-    MvPatternPtr = &MvPattern2[MAX_MV_EXTENT];
-  }
-
-  /* Pack and encode the motion vectors */
-  for ( i = 0; i < (ogg_int32_t)cpi->MvListCount; i++ ) {
-    oggpackB_write( opb, MvPatternPtr[cpi->MVList[i].x],
-                     (ogg_uint32_t)MvBitsPtr[cpi->MVList[i].x] );
-    oggpackB_write( opb, MvPatternPtr[cpi->MVList[i].y],
-                     (ogg_uint32_t)MvBitsPtr[cpi->MVList[i].y] );
-  }
-
-}
-
-static void PackEOBRun( CP_INSTANCE *cpi) {
-  if(cpi->RunLength == 0)
-        return;
-
-  /* Note the appropriate EOB or EOB run token and any extra bits in
-     the optimised token list.  Use the huffman index assosciated with
-     the first token in the run */
-
-  /* Mark out which plane the block belonged to */
-  cpi->OptimisedTokenListPl[cpi->OptimisedTokenCount] =
-    (unsigned char)cpi->RunPlaneIndex;
-
-  /* Note the huffman index to be used */
-  cpi->OptimisedTokenListHi[cpi->OptimisedTokenCount] =
-    (unsigned char)cpi->RunHuffIndex;
-
-  if ( cpi->RunLength <= 3 ) {
-    if ( cpi->RunLength == 1 ) {
-      cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_TOKEN;
-    } else if ( cpi->RunLength == 2 ) {
-      cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_PAIR_TOKEN;
-    } else {
-      cpi->OptimisedTokenList[cpi->OptimisedTokenCount] = DCT_EOB_TRIPLE_TOKEN;
-    }
-
-    cpi->RunLength = 0;
-
-  } else {
-
-    /* Choose a token appropriate to the run length. */
-    if ( cpi->RunLength < 8 ) {
-      cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
-        DCT_REPEAT_RUN_TOKEN;
-      cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
-        cpi->RunLength - 4;
-      cpi->RunLength = 0;
-    } else if ( cpi->RunLength < 16 ) {
-      cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
-        DCT_REPEAT_RUN2_TOKEN;
-      cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
-        cpi->RunLength - 8;
-      cpi->RunLength = 0;
-    } else if ( cpi->RunLength < 32 ) {
-      cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
-        DCT_REPEAT_RUN3_TOKEN;
-      cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
-        cpi->RunLength - 16;
-      cpi->RunLength = 0;
-    } else if ( cpi->RunLength < 4096) {
-      cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
-        DCT_REPEAT_RUN4_TOKEN;
-      cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
-        cpi->RunLength;
-      cpi->RunLength = 0;
-    }
-
-  }
-
-  cpi->OptimisedTokenCount++;
-  /* Reset run EOB length */
-  cpi->RunLength = 0;
-}
-
-static void PackToken ( CP_INSTANCE *cpi, ogg_int32_t FragmentNumber,
-                 ogg_uint32_t HuffIndex ) {
-  ogg_uint32_t Token =
-    cpi->pb.TokenList[FragmentNumber][cpi->FragTokens[FragmentNumber]];
-  ogg_uint32_t ExtraBitsToken =
-    cpi->pb.TokenList[FragmentNumber][cpi->FragTokens[FragmentNumber] + 1];
-  ogg_uint32_t OneOrTwo;
-  ogg_uint32_t OneOrZero;
-
-  /* Update the record of what coefficient we have got up to for this
-     block and unpack the encoded token back into the quantised data
-     array. */
-  if ( Token == DCT_EOB_TOKEN )
-    cpi->pb.FragCoeffs[FragmentNumber] = BLOCK_SIZE;
-  else
-    ExpandToken( cpi->pb.QFragData[FragmentNumber],
-                 &cpi->pb.FragCoeffs[FragmentNumber],
-                 Token, ExtraBitsToken );
-
-  /* Update record of tokens coded and where we are in this fragment. */
-  /* Is there an extra bits token */
-  OneOrTwo= 1 + ( cpi->pb.ExtraBitLengths_VP3x[Token] > 0 );
-  /* Advance to the next real token. */
-  cpi->FragTokens[FragmentNumber] += (unsigned char)OneOrTwo;
-
-  /* Update the counts of tokens coded */
-  cpi->TokensCoded += OneOrTwo;
-  cpi->TokensToBeCoded -= OneOrTwo;
-
-  OneOrZero = ( FragmentNumber < (ogg_int32_t)cpi->pb.YPlaneFragments );
-
-  if ( Token == DCT_EOB_TOKEN ) {
-    if ( cpi->RunLength == 0 ) {
-      cpi->RunHuffIndex = HuffIndex;
-      cpi->RunPlaneIndex = 1 -  OneOrZero;
-    }
-    cpi->RunLength++;
-
-    /* we have exceeded our longest run length  xmit an eob run token; */
-    if ( cpi->RunLength == 4095 ) PackEOBRun(cpi);
-
-  }else{
-
-    /* If we have an EOB run then code it up first */
-    if ( cpi->RunLength > 0 ) PackEOBRun( cpi);
-
-    /* Mark out which plane the block belonged to */
-    cpi->OptimisedTokenListPl[cpi->OptimisedTokenCount] =
-      (unsigned char)(1 - OneOrZero);
-
-    /* Note the token, extra bits and hufman table in the optimised
-       token list */
-    cpi->OptimisedTokenList[cpi->OptimisedTokenCount] =
-      (unsigned char)Token;
-    cpi->OptimisedTokenListEb[cpi->OptimisedTokenCount] =
-      ExtraBitsToken;
-    cpi->OptimisedTokenListHi[cpi->OptimisedTokenCount] =
-      (unsigned char)HuffIndex;
-
-    cpi->OptimisedTokenCount++;
-  }
-}
-
-static ogg_uint32_t GetBlockReconErrorSlow( CP_INSTANCE *cpi,
-                                     ogg_int32_t BlockIndex ) {
-  ogg_uint32_t  ErrorVal;
-
-  unsigned char * SrcDataPtr =
-    &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[BlockIndex]];
-  unsigned char * RecDataPtr =
-    &cpi->pb.LastFrameRecon[cpi->pb.recon_pixel_index_table[BlockIndex]];
-  ogg_int32_t   SrcStride;
-  ogg_int32_t   RecStride;
-
-  /* Is the block a Y block or a UV block. */
-  if ( BlockIndex < (ogg_int32_t)cpi->pb.YPlaneFragments ) {
-    SrcStride = cpi->pb.info.width;
-    RecStride = cpi->pb.YStride;
-  }else{
-    SrcStride = cpi->pb.info.width >> 1;
-    RecStride = cpi->pb.UVStride;
-  }
-
-  ErrorVal = dsp_sad8x8 (cpi->dsp, SrcDataPtr, SrcStride, RecDataPtr, RecStride);
-
-  return ErrorVal;
-}
-
-static void PackCodedVideo (CP_INSTANCE *cpi) {
-  ogg_int32_t i;
-  ogg_int32_t EncodedCoeffs = 1;
-  ogg_int32_t FragIndex;
-  ogg_uint32_t HuffIndex; /* Index to group of tables used to code a token */
-
-  /* Reset the count of second order optimised tokens */
-  cpi->OptimisedTokenCount = 0;
-
-  cpi->TokensToBeCoded = cpi->TotTokenCount;
-  cpi->TokensCoded = 0;
-
-  /* Calculate the bit rate at which this frame should be capped. */
-  cpi->MaxBitTarget = (ogg_uint32_t)((double)(cpi->ThisFrameTargetBytes * 8) *
-                                     cpi->BitRateCapFactor);
-
-  /* Blank the various fragment data structures before we start. */
-  memset(cpi->pb.FragCoeffs, 0, cpi->pb.UnitFragments);
-  memset(cpi->FragTokens, 0, cpi->pb.UnitFragments);
-
-  /* Clear down the QFragData structure for all coded blocks. */
-  ClearDownQFragData(&cpi->pb);
-
-  /* The tree is not needed (implicit) for key frames */
-  if ( cpi->pb.FrameType != KEY_FRAME ){
-    /* Pack the quad tree fragment mapping. */
-    PackAndWriteDFArray( cpi );
-  }
-
-  /* Note the number of bits used to code the tree itself. */
-  cpi->FrameBitCount = oggpackB_bytes(cpi->oggbuffer) << 3;
-
-  /* Mode and MV data not needed for key frames. */
-  if ( cpi->pb.FrameType != KEY_FRAME ){
-    /* Pack and code the mode list. */
-    PackModes(cpi);
-    /* Pack the motion vectors */
-    PackMotionVectors (cpi);
-  }
-
-  cpi->FrameBitCount = oggpackB_bytes(cpi->oggbuffer) << 3;
-
-  /* Optimise the DC tokens */
-  for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
-    /* Get the linear index for the current fragment. */
-    FragIndex = cpi->pb.CodedBlockList[i];
-
-    cpi->pb.FragCoefEOB[FragIndex]=(unsigned char)EncodedCoeffs;
-    PackToken(cpi, FragIndex, DC_HUFF_OFFSET );
-
-  }
-
-  /* Pack any outstanding EOB tokens */
-  PackEOBRun(cpi);
-
-  /* Now output the optimised DC token list using the appropriate
-     entropy tables. */
-  EncodeDcTokenList(cpi);
-
-  /* Work out the number of DC bits coded */
-
-  /* Optimise the AC tokens */
-  while ( EncodedCoeffs < 64 ) {
-    /* Huffman table adjustment based upon coefficient number. */
-    if ( EncodedCoeffs <= AC_TABLE_2_THRESH )
-      HuffIndex = AC_HUFF_OFFSET;
-    else if ( EncodedCoeffs <= AC_TABLE_3_THRESH )
-      HuffIndex = AC_HUFF_OFFSET + AC_HUFF_CHOICES;
-    else if ( EncodedCoeffs <= AC_TABLE_4_THRESH )
-      HuffIndex = AC_HUFF_OFFSET + (AC_HUFF_CHOICES * 2);
-    else
-      HuffIndex = AC_HUFF_OFFSET + (AC_HUFF_CHOICES * 3);
-
-    /* Repeatedly scan through the list of blocks. */
-    for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
-      /* Get the linear index for the current fragment. */
-      FragIndex = cpi->pb.CodedBlockList[i];
-
-      /* Should we code a token for this block on this pass. */
-      if ( cpi->FragTokens[FragIndex] < cpi->FragTokenCounts[FragIndex]
-           && cpi->pb.FragCoeffs[FragIndex] <= EncodedCoeffs ) {
-        /* Bit pack and a token for this block */
-        cpi->pb.FragCoefEOB[FragIndex]=(unsigned char)EncodedCoeffs;
-        PackToken( cpi, FragIndex, HuffIndex );
-      }
-    }
-
-    EncodedCoeffs ++;
-  }
-
-  /* Pack any outstanding EOB tokens */
-  PackEOBRun(cpi);
-
-  /* Now output the optimised AC token list using the appropriate
-     entropy tables. */
-  EncodeAcTokenList(cpi);
-
-}
-
-static ogg_uint32_t QuadCodeDisplayFragments (CP_INSTANCE *cpi) {
-  ogg_int32_t   i,j;
-  ogg_uint32_t  coded_pixels=0;
-  int           QIndex;
-  int k,m,n;
-
-  /* predictor multiplier up-left, up, up-right,left, shift
-     Entries are packed in the order L, UL, U, UR, with missing entries
-      moved to the end (before the shift parameters). */
-  static const ogg_int16_t pc[16][6]={
-    {0,0,0,0,0,0},
-    {1,0,0,0,0,0},      /* PL */
-    {1,0,0,0,0,0},      /* PUL */
-    {1,0,0,0,0,0},      /* PUL|PL */
-    {1,0,0,0,0,0},      /* PU */
-    {1,1,0,0,1,1},      /* PU|PL */
-    {0,1,0,0,0,0},      /* PU|PUL */
-    {29,-26,29,0,5,31}, /* PU|PUL|PL */
-    {1,0,0,0,0,0},      /* PUR */
-    {75,53,0,0,7,127},  /* PUR|PL */
-    {1,1,0,0,1,1},      /* PUR|PUL */
-    {75,0,53,0,7,127},  /* PUR|PUL|PL */
-    {1,0,0,0,0,0},      /* PUR|PU */
-    {75,0,53,0,7,127},  /* PUR|PU|PL */
-    {3,10,3,0,4,15},    /* PUR|PU|PUL */
-    {29,-26,29,0,5,31}  /* PUR|PU|PUL|PL */
-  };
-
-  /* boundary case bit masks. */
-  static const int bc_mask[8]={
-    /* normal case no boundary condition */
-    PUR|PU|PUL|PL,
-    /* left column */
-    PUR|PU,
-    /* top row */
-    PL,
-    /* top row, left column */
-    0,
-    /* right column */
-    PU|PUL|PL,
-    /* right and left column */
-    PU,
-    /* top row, right column */
-    PL,
-    /* top row, right and left column */
-    0
-  };
-
-  /* value left value up-left, value up, value up-right, missing
-      values skipped. */
-  int v[4];
-
-  /* fragment number left, up-left, up, up-right */
-  int fn[4];
-
-  /* predictor count. */
-  int pcount;
-
-  /*which predictor constants to use */
-  ogg_int16_t wpc;
-
-  /* last used inter predictor (Raster Order) */
-  ogg_int16_t Last[3];  /* last value used for given frame */
-
-  int FragsAcross=cpi->pb.HFragments;
-  int FragsDown = cpi->pb.VFragments;
-  int FromFragment,ToFragment;
-  ogg_int32_t   FragIndex;
-  int WhichFrame;
-  int WhichCase;
-
-  static const ogg_int16_t Mode2Frame[] = {
-    1,  /* CODE_INTER_NO_MV     0 => Encoded diff from same MB last frame  */
-    0,  /* CODE_INTRA           1 => DCT Encoded Block */
-    1,  /* CODE_INTER_PLUS_MV   2 => Encoded diff from included MV MB last frame */
-    1,  /* CODE_INTER_LAST_MV   3 => Encoded diff from MRU MV MB last frame */
-    1,  /* CODE_INTER_PRIOR_MV  4 => Encoded diff from included 4 separate MV blocks */
-    2,  /* CODE_USING_GOLDEN    5 => Encoded diff from same MB golden frame */
-    2,  /* CODE_GOLDEN_MV       6 => Encoded diff from included MV MB golden frame */
-    1   /* CODE_INTER_FOUR_MV   7 => Encoded diff from included 4 separate MV blocks */
-  };
-
-  ogg_int16_t PredictedDC;
-
-  /* Initialise the coded block indices variables. These allow
-     subsequent linear access to the quad tree ordered list of coded
-     blocks */
-  cpi->pb.CodedBlockIndex = 0;
-
-  /* Set the inter/intra descision control variables. */
-  QIndex = Q_TABLE_SIZE - 1;
-  while ( QIndex >= 0 ) {
-    if ( (QIndex == 0) ||
-         ( cpi->pb.QThreshTable[QIndex] >= cpi->pb.ThisFrameQualityValue) )
-      break;
-    QIndex --;
-  }
-
-
-  /* Encode and tokenise the Y, U and V components */
-  coded_pixels = QuadCodeComponent(cpi, 0, cpi->pb.YSBRows, cpi->pb.YSBCols,
-                                   cpi->pb.info.width );
-  coded_pixels += QuadCodeComponent(cpi, cpi->pb.YSuperBlocks,
-                                    cpi->pb.UVSBRows,
-                                    cpi->pb.UVSBCols,
-                                    cpi->pb.info.width>>1 );
-  coded_pixels += QuadCodeComponent(cpi,
-                                    cpi->pb.YSuperBlocks+cpi->pb.UVSuperBlocks,
-                                    cpi->pb.UVSBRows, cpi->pb.UVSBCols,
-                                    cpi->pb.info.width>>1 );
-
-  /* for y,u,v */
-  for ( j = 0; j < 3 ; j++) {
-    /* pick which fragments based on Y, U, V */
-    switch(j){
-    case 0: /* y */
-      FromFragment = 0;
-      ToFragment = cpi->pb.YPlaneFragments;
-      FragsAcross = cpi->pb.HFragments;
-      FragsDown = cpi->pb.VFragments;
-      break;
-    case 1: /* u */
-      FromFragment = cpi->pb.YPlaneFragments;
-      ToFragment = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments ;
-      FragsAcross = cpi->pb.HFragments >> 1;
-      FragsDown = cpi->pb.VFragments >> 1;
-      break;
-    /*case 2:  v */
-    default:
-      FromFragment = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments;
-      ToFragment = cpi->pb.YPlaneFragments + (2 * cpi->pb.UVPlaneFragments) ;
-      FragsAcross = cpi->pb.HFragments >> 1;
-      FragsDown = cpi->pb.VFragments >> 1;
-      break;
-    }
-
-    /* initialize our array of last used DC Components */
-    for(k=0;k<3;k++)Last[k]=0;
-    i=FromFragment;
-
-    /* do prediction on all of Y, U or V */
-    for ( m = 0 ; m < FragsDown ; m++) {
-      for ( n = 0 ; n < FragsAcross ; n++, i++) {
-        cpi->OriginalDC[i] = cpi->pb.QFragData[i][0];
-
-        /* only do 2 prediction if fragment coded and on non intra or
-           if all fragments are intra */
-        if( cpi->pb.display_fragments[i] ||
-            (cpi->pb.FrameType == KEY_FRAME) ) {
-          /* Type of Fragment */
-
-          WhichFrame = Mode2Frame[cpi->pb.FragCodingMethod[i]];
-
-          /* Check Borderline Cases */
-          WhichCase = (n==0) + ((m==0) << 1) + ((n+1 == FragsAcross) << 2);
-
-          fn[0]=i-1;
-          fn[1]=i-FragsAcross-1;
-          fn[2]=i-FragsAcross;
-          fn[3]=i-FragsAcross+1;
-
-          /* fragment valid for prediction use if coded and it comes
-             from same frame as the one we are predicting */
-          for(k=pcount=wpc=0; k<4; k++) {
-            int pflag;
-            pflag=1<<k;
-            if((bc_mask[WhichCase]&pflag) &&
-               cpi->pb.display_fragments[fn[k]] &&
-               (Mode2Frame[cpi->pb.FragCodingMethod[fn[k]]] == WhichFrame)){
-              v[pcount]=cpi->OriginalDC[fn[k]];
-              wpc|=pflag;
-              pcount++;
-            }
-          }
-
-          if(wpc==0) {
-
-            /* fall back to the last coded fragment */
-            cpi->pb.QFragData[i][0] -= Last[WhichFrame];
-
-          } else {
-
-            /* don't do divide if divisor is 1 or 0 */
-            PredictedDC = pc[wpc][0]*v[0];
-            for(k=1; k<pcount; k++){
-              PredictedDC += pc[wpc][k]*v[k];
-            }
-
-            /* if we need to do a shift */
-            if(pc[wpc][4] != 0 ) {
-
-              /* If negative add in the negative correction factor */
-              PredictedDC += (HIGHBITDUPPED(PredictedDC) & pc[wpc][5]);
-              /* Shift in lieu of a divide */
-              PredictedDC >>= pc[wpc][4];
-
-            }
-
-            /* check for outranging on the two predictors that can outrange */
-            if((wpc&(PU|PUL|PL)) == (PU|PUL|PL)){
-              if( abs(PredictedDC - v[2]) > 128) {
-                PredictedDC = v[2];
-              } else if( abs(PredictedDC - v[0]) > 128) {
-                PredictedDC = v[0];
-              } else if( abs(PredictedDC - v[1]) > 128) {
-                PredictedDC = v[1];
-              }
-            }
-
-            cpi->pb.QFragData[i][0] -= PredictedDC;
-          }
-
-          /* Save the last fragment coded for whatever frame we are
-             predicting from */
-
-          Last[WhichFrame] = cpi->OriginalDC[i];
-
-        }
-      }
-    }
-  }
-
-  /* Pack DC tokens and adjust the ones we couldn't predict 2d */
-  for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
-    /* Get the linear index for the current coded fragment. */
-    FragIndex = cpi->pb.CodedBlockList[i];
-    coded_pixels += DPCMTokenizeBlock ( cpi, FragIndex);
-
-  }
-
-  /* Bit pack the video data data */
-  PackCodedVideo(cpi);
-
-  /* End the bit packing run. */
-  /* EndAddBitsToBuffer(cpi); */
-
-  /* Reconstruct the reference frames */
-  ReconRefFrames(&cpi->pb);
-
-  UpdateFragQIndex(&cpi->pb);
-
-  /* Measure the inter reconstruction error for all the blocks that
-     were coded */
-  /* for use as part of the recovery monitoring process in subsequent frames. */
-  for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) {
-    cpi->LastCodedErrorScore[ cpi->pb.CodedBlockList[i] ] =
-      GetBlockReconErrorSlow( cpi, cpi->pb.CodedBlockList[i] );
-
-  }
-
-  /* Return total number of coded pixels */
-  return coded_pixels;
-}
-
-ogg_uint32_t EncodeData(CP_INSTANCE *cpi){
-    ogg_uint32_t coded_pixels = 0;
-
-    /* Zero the count of tokens so far this frame. */
-    cpi->TotTokenCount = 0;
-
-    /* Zero the mode and MV list indices. */
-    cpi->ModeListCount = 0;
-
-    /* Zero Decoder EOB run count */
-    cpi->pb.EOB_Run = 0;
-
-    dsp_save_fpu (cpi->dsp);
-
-    /* Encode any fragments coded using DCT. */
-    coded_pixels += QuadCodeDisplayFragments (cpi);
-
-    dsp_restore_fpu (cpi->dsp);
-
-    return coded_pixels;
-
-}
-
-ogg_uint32_t PickIntra( CP_INSTANCE *cpi,
-                        ogg_uint32_t SBRows,
-                        ogg_uint32_t SBCols){
-
-  ogg_int32_t   FragIndex;  /* Fragment number */
-  ogg_uint32_t  MB, B;      /* Macro-Block, Block indices */
-  ogg_uint32_t  SBrow;      /* Super-Block row number */
-  ogg_uint32_t  SBcol;      /* Super-Block row number */
-  ogg_uint32_t  SB=0;       /* Super-Block index, initialised to first of
-                               this component */
-  ogg_uint32_t UVRow;
-  ogg_uint32_t UVColumn;
-  ogg_uint32_t UVFragOffset;
-
-  /* decide what block type and motion vectors to use on all of the frames */
-  for ( SBrow=0; SBrow<SBRows; SBrow++ ) {
-    for ( SBcol=0; SBcol<SBCols; SBcol++ ) {
-      /* Check its four Macro-Blocks */
-      for ( MB=0; MB<4; MB++ ) {
-        /* There may be MB's lying out of frame which must be
-           ignored. For these MB's Top left block will have a negative
-           Fragment Index. */
-        if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) >= 0 ) {
-
-          cpi->MBCodingMode = CODE_INTRA;
-
-          /* Now actually code the blocks. */
-          for ( B=0; B<4; B++ ) {
-            FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
-            cpi->pb.FragCodingMethod[FragIndex] = cpi->MBCodingMode;
-          }
-
-          /* Matching fragments in the U and V planes */
-          UVRow = (FragIndex / (cpi->pb.HFragments * 2));
-          UVColumn = (FragIndex % cpi->pb.HFragments) / 2;
-          UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
-
-          cpi->pb.FragCodingMethod[cpi->pb.YPlaneFragments + UVFragOffset] =
-            cpi->MBCodingMode;
-          cpi->pb.FragCodingMethod[cpi->pb.YPlaneFragments +
-                                  cpi->pb.UVPlaneFragments + UVFragOffset] =
-            cpi->MBCodingMode;
-        }
-      }
-
-      /* Next Super-Block */
-      SB++;
-    }
-  }
-  return 0;
-}
-
-static void AddMotionVector(CP_INSTANCE *cpi,
-                     MOTION_VECTOR *ThisMotionVector) {
-  cpi->MVList[cpi->MvListCount].x = ThisMotionVector->x;
-  cpi->MVList[cpi->MvListCount].y = ThisMotionVector->y;
-  cpi->MvListCount++;
-}
-
-static void SetFragMotionVectorAndMode(CP_INSTANCE *cpi,
-                                ogg_int32_t FragIndex,
-                                MOTION_VECTOR *ThisMotionVector){
-  /* Note the coding mode and vector for each block */
-  cpi->pb.FragMVect[FragIndex].x = ThisMotionVector->x;
-  cpi->pb.FragMVect[FragIndex].y = ThisMotionVector->y;
-  cpi->pb.FragCodingMethod[FragIndex] = cpi->MBCodingMode;
-}
-
-static void SetMBMotionVectorsAndMode(CP_INSTANCE *cpi,
-                               ogg_int32_t YFragIndex,
-                               ogg_int32_t UFragIndex,
-                               ogg_int32_t VFragIndex,
-                               MOTION_VECTOR *ThisMotionVector){
-  SetFragMotionVectorAndMode(cpi, YFragIndex, ThisMotionVector);
-  SetFragMotionVectorAndMode(cpi, YFragIndex + 1, ThisMotionVector);
-  SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments,
-                             ThisMotionVector);
-  SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1,
-                             ThisMotionVector);
-  SetFragMotionVectorAndMode(cpi, UFragIndex, ThisMotionVector);
-  SetFragMotionVectorAndMode(cpi, VFragIndex, ThisMotionVector);
-}
-
-ogg_uint32_t PickModes(CP_INSTANCE *cpi,
-                       ogg_uint32_t SBRows, ogg_uint32_t SBCols,
-                       ogg_uint32_t PixelsPerLine,
-                       ogg_uint32_t *InterError, ogg_uint32_t *IntraError) {
-  ogg_int32_t   YFragIndex;
-  ogg_int32_t   UFragIndex;
-  ogg_int32_t   VFragIndex;
-  ogg_uint32_t  MB, B;      /* Macro-Block, Block indices */
-  ogg_uint32_t  SBrow;      /* Super-Block row number */
-  ogg_uint32_t  SBcol;      /* Super-Block row number */
-  ogg_uint32_t  SB=0;       /* Super-Block index, initialised to first
-                               of this component */
-
-  ogg_uint32_t  MBIntraError;           /* Intra error for macro block */
-  ogg_uint32_t  MBGFError;              /* Golden frame macro block error */
-  ogg_uint32_t  MBGF_MVError;           /* Golden frame plus MV error */
-  ogg_uint32_t  LastMBGF_MVError;       /* Golden frame error with
-                                           last used GF motion
-                                           vector. */
-  ogg_uint32_t  MBInterError;           /* Inter no MV macro block error */
-  ogg_uint32_t  MBLastInterError;       /* Inter with last used MV */
-  ogg_uint32_t  MBPriorLastInterError;  /* Inter with prior last MV */
-  ogg_uint32_t  MBInterMVError;         /* Inter MV macro block error */
-  ogg_uint32_t  MBInterMVExError;       /* Inter MV (exhaustive
-                                           search) macro block error */
-  ogg_uint32_t  MBInterFOURMVError;     /* Inter MV error when using 4
-                                           motion vectors per macro
-                                           block */
-  ogg_uint32_t  BestError;              /* Best error so far. */
-
-  MOTION_VECTOR FourMVect[6];     /* storage for last used vectors (one
-                                     entry for each block in MB) */
-  MOTION_VECTOR LastInterMVect;   /* storage for last used Inter frame
-                                     MB motion vector */
-  MOTION_VECTOR PriorLastInterMVect;  /* storage for prior last used
-                                         Inter frame MB motion vector */
-  MOTION_VECTOR TmpMVect;         /* Temporary MV storage */
-  MOTION_VECTOR LastGFMVect;      /* storage for last used Golden
-                                     Frame MB motion vector */
-  MOTION_VECTOR InterMVect;       /* storage for motion vector */
-  MOTION_VECTOR InterMVectEx;     /* storage for motion vector result
-                                     from exhaustive search */
-  MOTION_VECTOR GFMVect;          /* storage for motion vector */
-  MOTION_VECTOR ZeroVect;
-
-  ogg_uint32_t UVRow;
-  ogg_uint32_t UVColumn;
-  ogg_uint32_t UVFragOffset;
-
-  int          MBCodedFlag;
-  unsigned char QIndex;
-
-  /* initialize error scores */
-  *InterError = 0;
-  *IntraError = 0;
-
-  /* clear down the default motion vector. */
-  cpi->MvListCount = 0;
-  FourMVect[0].x = 0;
-  FourMVect[0].y = 0;
-  FourMVect[1].x = 0;
-  FourMVect[1].y = 0;
-  FourMVect[2].x = 0;
-  FourMVect[2].y = 0;
-  FourMVect[3].x = 0;
-  FourMVect[3].y = 0;
-  FourMVect[4].x = 0;
-  FourMVect[4].y = 0;
-  FourMVect[5].x = 0;
-  FourMVect[5].y = 0;
-  LastInterMVect.x = 0;
-  LastInterMVect.y = 0;
-  PriorLastInterMVect.x = 0;
-  PriorLastInterMVect.y = 0;
-  LastGFMVect.x = 0;
-  LastGFMVect.y = 0;
-  InterMVect.x = 0;
-  InterMVect.y = 0;
-  GFMVect.x = 0;
-  GFMVect.y = 0;
-
-  ZeroVect.x = 0;
-  ZeroVect.y = 0;
-
-  QIndex = (unsigned char)cpi->pb.FrameQIndex;
-
-
-  /* change the quatization matrix to the one at best Q to compute the
-     new error score */
-  cpi->MinImprovementForNewMV = (MvThreshTable[QIndex] << 12);
-  cpi->InterTripOutThresh = (5000<<12);
-  cpi->MVChangeFactor = MVChangeFactorTable[QIndex]; /* 0.9 */
-
-  if ( cpi->pb.info.quick_p ) {
-    cpi->ExhaustiveSearchThresh = (1000<<12);
-    cpi->FourMVThreshold = (2500<<12);
-  } else {
-    cpi->ExhaustiveSearchThresh = (250<<12);
-    cpi->FourMVThreshold = (500<<12);
-  }
-  cpi->MinImprovementForFourMV = cpi->MinImprovementForNewMV * 4;
-
-  if(cpi->MinImprovementForFourMV < (40<<12))
-    cpi->MinImprovementForFourMV = (40<<12);
-
-  cpi->FourMvChangeFactor = 8; /* cpi->MVChangeFactor - 0.05;  */
-
-  /* decide what block type and motion vectors to use on all of the frames */
-  for ( SBrow=0; SBrow<SBRows; SBrow++ ) {
-    for ( SBcol=0; SBcol<SBCols; SBcol++ ) {
-      /* Check its four Macro-Blocks */
-      for ( MB=0; MB<4; MB++ ) {
-        /* There may be MB's lying out of frame which must be
-           ignored. For these MB's Top left block will have a negative
-           Fragment Index. */
-        if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) < 0 ) continue;
-
-        /* Is the current macro block coded (in part or in whole) */
-        MBCodedFlag = 0;
-        for ( B=0; B<4; B++ ) {
-          YFragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
-
-          /* Does Block lie in frame: */
-          if ( YFragIndex >= 0 ) {
-            /* In Frame: Is it coded: */
-            if ( cpi->pb.display_fragments[YFragIndex] ) {
-              MBCodedFlag = 1;
-              break;
-            }
-          } else
-            MBCodedFlag = 0;
-        }
-
-        /* This one isn't coded go to the next one */
-        if(!MBCodedFlag) continue;
-
-        /* Calculate U and V FragIndex from YFragIndex */
-        YFragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB,MB);
-        UVRow = (YFragIndex / (cpi->pb.HFragments * 2));
-        UVColumn = (YFragIndex % cpi->pb.HFragments) / 2;
-        UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
-        UFragIndex = cpi->pb.YPlaneFragments + UVFragOffset;
-        VFragIndex = cpi->pb.YPlaneFragments + cpi->pb.UVPlaneFragments +
-          UVFragOffset;
-
-
-        /**************************************************************
-         Find the block choice with the lowest error
-
-         NOTE THAT if U or V is coded but no Y from a macro block then
-         the mode will be CODE_INTER_NO_MV as this is the default
-         state to which the mode data structure is initialised in
-         encoder and decoder at the start of each frame. */
-
-        BestError = HUGE_ERROR;
-
-
-        /* Look at the intra coding error. */
-        MBIntraError = GetMBIntraError( cpi, YFragIndex, PixelsPerLine );
-        BestError = (BestError > MBIntraError) ? MBIntraError : BestError;
-
-        /* Get the golden frame error */
-        MBGFError = GetMBInterError( cpi, cpi->ConvDestBuffer,
-                                     cpi->pb.GoldenFrame, YFragIndex,
-                                     0, 0, PixelsPerLine );
-        BestError = (BestError > MBGFError) ? MBGFError : BestError;
-
-        /* Calculate the 0,0 case. */
-        MBInterError = GetMBInterError( cpi, cpi->ConvDestBuffer,
-                                        cpi->pb.LastFrameRecon,
-                                        YFragIndex, 0, 0, PixelsPerLine );
-        BestError = (BestError > MBInterError) ? MBInterError : BestError;
-
-        /* Measure error for last MV */
-        MBLastInterError =  GetMBInterError( cpi, cpi->ConvDestBuffer,
-                                             cpi->pb.LastFrameRecon,
-                                             YFragIndex, LastInterMVect.x,
-                                             LastInterMVect.y, PixelsPerLine );
-        BestError = (BestError > MBLastInterError) ?
-          MBLastInterError : BestError;
-
-        /* Measure error for prior last MV */
-        MBPriorLastInterError =  GetMBInterError( cpi, cpi->ConvDestBuffer,
-                                                  cpi->pb.LastFrameRecon,
-                                                  YFragIndex,
-                                                  PriorLastInterMVect.x,
-                                                  PriorLastInterMVect.y,
-                                                  PixelsPerLine );
-        BestError = (BestError > MBPriorLastInterError) ?
-          MBPriorLastInterError : BestError;
-
-        /* Temporarily force usage of no motionvector blocks */
-        MBInterMVError = HUGE_ERROR;
-        InterMVect.x = 0;  /* Set 0,0 motion vector */
-        InterMVect.y = 0;
-
-        /* If the best error is above the required threshold search
-           for a new inter MV */
-        if ( BestError > cpi->MinImprovementForNewMV && cpi->MotionCompensation) {
-          /* Use a mix of heirachical and exhaustive searches for
-             quick mode. */
-          if ( cpi->pb.info.quick_p ) {
-            MBInterMVError = GetMBMVInterError( cpi, cpi->pb.LastFrameRecon,
-                                                YFragIndex, PixelsPerLine,
-                                                cpi->MVPixelOffsetY,
-                                                &InterMVect );
-
-            /* If we still do not have a good match try an exhaustive
-               MBMV search */
-            if ( (MBInterMVError > cpi->ExhaustiveSearchThresh) &&
-                 (BestError > cpi->ExhaustiveSearchThresh) ) {
-
-              MBInterMVExError =
-                GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
-                                         YFragIndex, PixelsPerLine,
-                                         &InterMVectEx );
-
-              /* Is the Variance measure for the EX search
-                 better... If so then use it. */
-              if ( MBInterMVExError < MBInterMVError ) {
-                MBInterMVError = MBInterMVExError;
-                InterMVect.x = InterMVectEx.x;
-                InterMVect.y = InterMVectEx.y;
-              }
-            }
-          }else{
-            /* Use an exhaustive search */
-            MBInterMVError =
-              GetMBMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
-                                       YFragIndex, PixelsPerLine,
-                                       &InterMVect );
-          }
-
-
-          /* Is the improvement, if any, good enough to justify a new MV */
-          if ( (16 * MBInterMVError < (BestError * cpi->MVChangeFactor)) &&
-               ((MBInterMVError + cpi->MinImprovementForNewMV) < BestError) ){
-            BestError = MBInterMVError;
-          }
-
-        }
-
-        /* If the best error is still above the required threshold
-           search for a golden frame MV */
-        MBGF_MVError = HUGE_ERROR;
-        GFMVect.x = 0; /* Set 0,0 motion vector */
-        GFMVect.y = 0;
-        if ( BestError > cpi->MinImprovementForNewMV && cpi->MotionCompensation) {
-          /* Do an MV search in the golden reference frame */
-          MBGF_MVError = GetMBMVInterError( cpi, cpi->pb.GoldenFrame,
-                                            YFragIndex, PixelsPerLine,
-                                            cpi->MVPixelOffsetY, &GFMVect );
-
-          /* Measure error for last GFMV */
-          LastMBGF_MVError =  GetMBInterError( cpi, cpi->ConvDestBuffer,
-                                               cpi->pb.GoldenFrame,
-                                               YFragIndex, LastGFMVect.x,
-                                               LastGFMVect.y, PixelsPerLine );
-
-          /* Check against last GF motion vector and reset if the
-             search has thrown a worse result. */
-          if ( LastMBGF_MVError < MBGF_MVError ) {
-            GFMVect.x = LastGFMVect.x;
-            GFMVect.y = LastGFMVect.y;
-            MBGF_MVError = LastMBGF_MVError;
-          }else{
-            LastGFMVect.x = GFMVect.x;
-            LastGFMVect.y = GFMVect.y;
-          }
-
-          /* Is the improvement, if any, good enough to justify a new MV */
-          if ( (16 * MBGF_MVError < (BestError * cpi->MVChangeFactor)) &&
-               ((MBGF_MVError + cpi->MinImprovementForNewMV) < BestError) ) {
-            BestError = MBGF_MVError;
-          }
-        }
-
-        /* Finally... If the best error is still to high then consider
-           the 4MV mode */
-        MBInterFOURMVError = HUGE_ERROR;
-        if ( BestError > cpi->FourMVThreshold && cpi->MotionCompensation) {
-          /* Get the 4MV error. */
-          MBInterFOURMVError =
-            GetFOURMVExhaustiveSearch( cpi, cpi->pb.LastFrameRecon,
-                                       YFragIndex, PixelsPerLine, FourMVect );
-
-          /* If the improvement is great enough then use the four MV mode */
-          if ( ((MBInterFOURMVError + cpi->MinImprovementForFourMV) <
-                BestError) && (16 * MBInterFOURMVError <
-                               (BestError * cpi->FourMvChangeFactor))) {
-            BestError = MBInterFOURMVError;
-          }
-        }
-
-        /********************************************************
-         end finding the best error
-         *******************************************************
-
-         Figure out what to do with the block we chose
-
-         Over-ride and force intra if error high and Intra error similar
-         Now choose a mode based on lowest error (with bias towards no MV) */
-
-        if ( (BestError > cpi->InterTripOutThresh) &&
-             (10 * BestError > MBIntraError * 7 ) ) {
-          cpi->MBCodingMode = CODE_INTRA;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&ZeroVect);
-        } else if ( BestError == MBInterError ) {
-          cpi->MBCodingMode = CODE_INTER_NO_MV;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&ZeroVect);
-        } else if ( BestError == MBGFError ) {
-          cpi->MBCodingMode = CODE_USING_GOLDEN;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&ZeroVect);
-        } else if ( BestError == MBLastInterError ) {
-          cpi->MBCodingMode = CODE_INTER_LAST_MV;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&LastInterMVect);
-        } else if ( BestError == MBPriorLastInterError ) {
-          cpi->MBCodingMode = CODE_INTER_PRIOR_LAST;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&PriorLastInterMVect);
-
-          /* Swap the prior and last MV cases over */
-          TmpMVect.x = PriorLastInterMVect.x;
-          TmpMVect.y = PriorLastInterMVect.y;
-          PriorLastInterMVect.x = LastInterMVect.x;
-          PriorLastInterMVect.y = LastInterMVect.y;
-          LastInterMVect.x = TmpMVect.x;
-          LastInterMVect.y = TmpMVect.y;
-
-        } else if ( BestError == MBInterMVError ) {
-
-          cpi->MBCodingMode = CODE_INTER_PLUS_MV;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&InterMVect);
-
-          /* Update Prior last mv with last mv */
-          PriorLastInterMVect.x = LastInterMVect.x;
-          PriorLastInterMVect.y = LastInterMVect.y;
-
-          /* Note last inter MV for future use */
-          LastInterMVect.x = InterMVect.x;
-          LastInterMVect.y = InterMVect.y;
-
-          AddMotionVector( cpi, &InterMVect);
-
-        } else if ( BestError == MBGF_MVError ) {
-
-          cpi->MBCodingMode = CODE_GOLDEN_MV;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&GFMVect);
-
-          /* Note last inter GF MV for future use */
-          LastGFMVect.x = GFMVect.x;
-          LastGFMVect.y = GFMVect.y;
-
-          AddMotionVector( cpi, &GFMVect);
-        } else if ( BestError == MBInterFOURMVError ) {
-          cpi->MBCodingMode = CODE_INTER_FOURMV;
-
-          /* Calculate the UV vectors as the average of the Y plane ones. */
-          /* First .x component */
-          FourMVect[4].x = FourMVect[0].x + FourMVect[1].x +
-            FourMVect[2].x + FourMVect[3].x;
-          if ( FourMVect[4].x >= 0 )
-            FourMVect[4].x = (FourMVect[4].x + 2) / 4;
-          else
-            FourMVect[4].x = (FourMVect[4].x - 2) / 4;
-          FourMVect[5].x = FourMVect[4].x;
-
-          /* Then .y component */
-          FourMVect[4].y = FourMVect[0].y + FourMVect[1].y +
-            FourMVect[2].y + FourMVect[3].y;
-          if ( FourMVect[4].y >= 0 )
-            FourMVect[4].y = (FourMVect[4].y + 2) / 4;
-          else
-            FourMVect[4].y = (FourMVect[4].y - 2) / 4;
-          FourMVect[5].y = FourMVect[4].y;
-
-          SetFragMotionVectorAndMode(cpi, YFragIndex, &FourMVect[0]);
-          SetFragMotionVectorAndMode(cpi, YFragIndex + 1, &FourMVect[1]);
-          SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments,
-                                     &FourMVect[2]);
-          SetFragMotionVectorAndMode(cpi, YFragIndex + cpi->pb.HFragments + 1,
-                                     &FourMVect[3]);
-          SetFragMotionVectorAndMode(cpi, UFragIndex, &FourMVect[4]);
-          SetFragMotionVectorAndMode(cpi, VFragIndex, &FourMVect[5]);
-
-          /* Note the four MVs values for current macro-block. */
-          AddMotionVector( cpi, &FourMVect[0]);
-          AddMotionVector( cpi, &FourMVect[1]);
-          AddMotionVector( cpi, &FourMVect[2]);
-          AddMotionVector( cpi, &FourMVect[3]);
-
-          /* Update Prior last mv with last mv */
-          PriorLastInterMVect.x = LastInterMVect.x;
-          PriorLastInterMVect.y = LastInterMVect.y;
-
-          /* Note last inter MV for future use */
-          LastInterMVect.x = FourMVect[3].x;
-          LastInterMVect.y = FourMVect[3].y;
-
-        } else {
-
-          cpi->MBCodingMode = CODE_INTRA;
-          SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex,
-                                    VFragIndex,&ZeroVect);
-        }
-
-
-        /* setting up mode specific block types
-           *******************************************************/
-
-        *InterError += (BestError>>8);
-        *IntraError += (MBIntraError>>8);
-
-
-      }
-      SB++;
-
-    }
-  }
-
-  /* Return number of pixels coded */
-  return 0;
-}
-
-void WriteFrameHeader( CP_INSTANCE *cpi) {
-  ogg_uint32_t i;
-  oggpack_buffer *opb=cpi->oggbuffer;
-  /* Output the frame type (base/key frame or inter frame) */
-  oggpackB_write( opb, cpi->pb.FrameType, 1 );
-  /* Write out details of the current value of Q... variable resolution. */
-  for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
-    if ( cpi->pb.ThisFrameQualityValue == cpi->pb.QThreshTable[i] ) {
-      oggpackB_write( opb, i, 6 );
-      break;
-    }
-  }
-
-  if ( i == Q_TABLE_SIZE ) {
-    /* An invalid DCT value was specified.  */
-    /*IssueWarning( "Invalid Q Multiplier" );*/
-    oggpackB_write( opb, 31, 6 );
-  }
-
-  /* we only support one Q index per frame */
-  oggpackB_write( opb, 0, 1 );
-
-  /* If the frame was a base frame then write out the frame dimensions. */
-  if ( cpi->pb.FrameType == KEY_FRAME ) {
-    /* all bits reserved! */
-    oggpackB_write( opb, 0, 3 );
-  }
-}
-

+ 0 - 310
Engine/lib/libtheora/lib/enc/encoder_huffman.c

@@ -1,310 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: encoder_huffman.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "codec_internal.h"
-#include "hufftables.h"
-
-static void CreateHuffmanList(HUFF_ENTRY ** HuffRoot,
-                              ogg_uint32_t HIndex,
-                              const ogg_uint32_t *FreqList ) {
-  int i;
-  HUFF_ENTRY *entry_ptr;
-  HUFF_ENTRY *search_ptr;
-
-  /* Create a HUFF entry for token zero. */
-  HuffRoot[HIndex] = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*HuffRoot[HIndex]));
-
-  HuffRoot[HIndex]->Previous = NULL;
-  HuffRoot[HIndex]->Next = NULL;
-  HuffRoot[HIndex]->ZeroChild = NULL;
-  HuffRoot[HIndex]->OneChild = NULL;
-  HuffRoot[HIndex]->Value = 0;
-  HuffRoot[HIndex]->Frequency = FreqList[0];
-
-  if ( HuffRoot[HIndex]->Frequency == 0 )
-    HuffRoot[HIndex]->Frequency = 1;
-
-  /* Now add entries for all the other possible tokens. */
-  for ( i = 1; i < MAX_ENTROPY_TOKENS; i++ ) {
-    entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr));
-
-    entry_ptr->Value = i;
-    entry_ptr->Frequency = FreqList[i];
-    entry_ptr->ZeroChild = NULL;
-    entry_ptr->OneChild = NULL;
-
-    /* Force min value of 1. This prevents the tree getting too deep. */
-    if ( entry_ptr->Frequency == 0 )
-      entry_ptr->Frequency = 1;
-
-    if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){
-      entry_ptr->Next = HuffRoot[HIndex];
-      HuffRoot[HIndex]->Previous = entry_ptr;
-      entry_ptr->Previous = NULL;
-      HuffRoot[HIndex] = entry_ptr;
-    }else{
-      search_ptr = HuffRoot[HIndex];
-      while ( (search_ptr->Next != NULL) &&
-              (search_ptr->Frequency < entry_ptr->Frequency) ){
-        search_ptr = (HUFF_ENTRY *)search_ptr->Next;
-      }
-
-      if ( search_ptr->Frequency < entry_ptr->Frequency ){
-        entry_ptr->Next = NULL;
-        entry_ptr->Previous = search_ptr;
-        search_ptr->Next = entry_ptr;
-      }else{
-        entry_ptr->Next = search_ptr;
-        entry_ptr->Previous = search_ptr->Previous;
-        search_ptr->Previous->Next = entry_ptr;
-        search_ptr->Previous = entry_ptr;
-      }
-    }
-  }
-}
-
-static void CreateCodeArray( HUFF_ENTRY * HuffRoot,
-                      ogg_uint32_t *HuffCodeArray,
-                      unsigned char *HuffCodeLengthArray,
-                      ogg_uint32_t CodeValue,
-                      unsigned char CodeLength ) {
-
-  /* If we are at a leaf then fill in a code array entry. */
-  if ( ( HuffRoot->ZeroChild == NULL ) && ( HuffRoot->OneChild == NULL ) ){
-    HuffCodeArray[HuffRoot->Value] = CodeValue;
-    HuffCodeLengthArray[HuffRoot->Value] = CodeLength;
-  }else{
-    /* Recursive calls to scan down the tree. */
-    CodeLength++;
-    CreateCodeArray(HuffRoot->ZeroChild, HuffCodeArray, HuffCodeLengthArray,
-                    ((CodeValue << 1) + 0), CodeLength);
-    CreateCodeArray(HuffRoot->OneChild, HuffCodeArray, HuffCodeLengthArray,
-                    ((CodeValue << 1) + 1), CodeLength);
-  }
-}
-
-static void  BuildHuffmanTree( HUFF_ENTRY **HuffRoot,
-                        ogg_uint32_t *HuffCodeArray,
-                        unsigned char *HuffCodeLengthArray,
-                        ogg_uint32_t HIndex,
-                        const ogg_uint32_t *FreqList ){
-
-  HUFF_ENTRY *entry_ptr;
-  HUFF_ENTRY *search_ptr;
-
-  /* First create a sorted linked list representing the frequencies of
-     each token. */
-  CreateHuffmanList( HuffRoot, HIndex, FreqList );
-
-  /* Now build the tree from the list. */
-
-  /* While there are at least two items left in the list. */
-  while ( HuffRoot[HIndex]->Next != NULL ){
-    /* Create the new node as the parent of the first two in the list. */
-    entry_ptr = (HUFF_ENTRY *)_ogg_calloc(1,sizeof(*entry_ptr));
-    entry_ptr->Value = -1;
-    entry_ptr->Frequency = HuffRoot[HIndex]->Frequency +
-      HuffRoot[HIndex]->Next->Frequency ;
-    entry_ptr->ZeroChild = HuffRoot[HIndex];
-    entry_ptr->OneChild = HuffRoot[HIndex]->Next;
-
-    /* If there are still more items in the list then insert the new
-       node into the list. */
-    if (entry_ptr->OneChild->Next != NULL ){
-      /* Set up the provisional 'new root' */
-      HuffRoot[HIndex] = entry_ptr->OneChild->Next;
-      HuffRoot[HIndex]->Previous = NULL;
-
-      /* Now scan through the remaining list to insert the new entry
-         at the appropriate point. */
-      if ( entry_ptr->Frequency <= HuffRoot[HIndex]->Frequency ){
-        entry_ptr->Next = HuffRoot[HIndex];
-        HuffRoot[HIndex]->Previous = entry_ptr;
-        entry_ptr->Previous = NULL;
-        HuffRoot[HIndex] = entry_ptr;
-      }else{
-        search_ptr = HuffRoot[HIndex];
-        while ( (search_ptr->Next != NULL) &&
-                (search_ptr->Frequency < entry_ptr->Frequency) ){
-          search_ptr = search_ptr->Next;
-        }
-
-        if ( search_ptr->Frequency < entry_ptr->Frequency ){
-          entry_ptr->Next = NULL;
-          entry_ptr->Previous = search_ptr;
-          search_ptr->Next = entry_ptr;
-        }else{
-          entry_ptr->Next = search_ptr;
-          entry_ptr->Previous = search_ptr->Previous;
-          search_ptr->Previous->Next = entry_ptr;
-          search_ptr->Previous = entry_ptr;
-        }
-      }
-    }else{
-      /* Build has finished. */
-      entry_ptr->Next = NULL;
-      entry_ptr->Previous = NULL;
-      HuffRoot[HIndex] = entry_ptr;
-    }
-
-    /* Delete the Next/Previous properties of the children (PROB NOT NEC). */
-    entry_ptr->ZeroChild->Next = NULL;
-    entry_ptr->ZeroChild->Previous = NULL;
-    entry_ptr->OneChild->Next = NULL;
-    entry_ptr->OneChild->Previous = NULL;
-
-  }
-
-  /* Now build a code array from the tree. */
-  CreateCodeArray( HuffRoot[HIndex], HuffCodeArray,
-                   HuffCodeLengthArray, 0, 0);
-}
-
-static void  DestroyHuffTree(HUFF_ENTRY *root_ptr){
-  if (root_ptr){
-    if ( root_ptr->ZeroChild )
-      DestroyHuffTree(root_ptr->ZeroChild);
-
-    if ( root_ptr->OneChild )
-      DestroyHuffTree(root_ptr->OneChild);
-
-    _ogg_free(root_ptr);
-  }
-}
-
-void ClearHuffmanSet( PB_INSTANCE *pbi ){
-  int i;
-
-  ClearHuffmanTrees(pbi->HuffRoot_VP3x);
-
-  for ( i = 0; i < NUM_HUFF_TABLES; i++ )
-    if (pbi->HuffCodeArray_VP3x[i])
-      _ogg_free (pbi->HuffCodeArray_VP3x[i]);
-
-  for ( i = 0; i < NUM_HUFF_TABLES; i++ )
-    if (pbi->HuffCodeLengthArray_VP3x[i])
-      _ogg_free (pbi->HuffCodeLengthArray_VP3x[i]);
-}
-
-void InitHuffmanSet( PB_INSTANCE *pbi ){
-  int i;
-
-  ClearHuffmanSet(pbi);
-
-  pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31;
-
-  for ( i = 0; i < NUM_HUFF_TABLES; i++ ){
-    pbi->HuffCodeArray_VP3x[i] =
-      _ogg_calloc(MAX_ENTROPY_TOKENS,
-                  sizeof(*pbi->HuffCodeArray_VP3x[i]));
-    pbi->HuffCodeLengthArray_VP3x[i] =
-      _ogg_calloc(MAX_ENTROPY_TOKENS,
-                  sizeof(*pbi->HuffCodeLengthArray_VP3x[i]));
-    BuildHuffmanTree( pbi->HuffRoot_VP3x,
-                      pbi->HuffCodeArray_VP3x[i],
-                      pbi->HuffCodeLengthArray_VP3x[i],
-                      i, FrequencyCounts_VP3[i]);
-  }
-}
-
-static int ReadHuffTree(HUFF_ENTRY * HuffRoot, int depth,
-                        oggpack_buffer *opb) {
-  long bit;
-  long ret;
-  theora_read(opb,1,&bit);
-  if(bit < 0) return OC_BADHEADER;
-  else if(!bit) {
-    int ret;
-    if (++depth > 32) return OC_BADHEADER;
-    HuffRoot->ZeroChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
-    ret = ReadHuffTree(HuffRoot->ZeroChild, depth, opb);
-    if (ret < 0) return ret;
-    HuffRoot->OneChild = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
-    ret = ReadHuffTree(HuffRoot->OneChild, depth, opb);
-    if (ret < 0) return ret;
-    HuffRoot->Value = -1;
-  } else {
-    HuffRoot->ZeroChild = NULL;
-    HuffRoot->OneChild = NULL;
-    theora_read(opb,5,&ret);
-    HuffRoot->Value=ret;;
-    if (HuffRoot->Value < 0) return OC_BADHEADER;
-  }
-  return 0;
-}
-
-int ReadHuffmanTrees(codec_setup_info *ci, oggpack_buffer *opb) {
-  int i;
-  for (i=0; i<NUM_HUFF_TABLES; i++) {
-     int ret;
-     ci->HuffRoot[i] = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
-     ret = ReadHuffTree(ci->HuffRoot[i], 0, opb);
-     if (ret) return ret;
-  }
-  return 0;
-}
-
-static void WriteHuffTree(HUFF_ENTRY *HuffRoot, oggpack_buffer *opb) {
-  if (HuffRoot->Value >= 0) {
-    oggpackB_write(opb, 1, 1);
-    oggpackB_write(opb, HuffRoot->Value, 5);
-  } else {
-    oggpackB_write(opb, 0, 1);
-    WriteHuffTree(HuffRoot->ZeroChild, opb);
-    WriteHuffTree(HuffRoot->OneChild, opb);
-  }
-}
-
-void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
-                       oggpack_buffer *opb) {
-  int i;
-  for(i=0; i<NUM_HUFF_TABLES; i++) {
-    WriteHuffTree(HuffRoot[i], opb);
-  }
-}
-
-static HUFF_ENTRY *CopyHuffTree(const HUFF_ENTRY *HuffSrc) {
-  if(HuffSrc){
-    HUFF_ENTRY *HuffDst;
-    HuffDst = (HUFF_ENTRY *)_ogg_calloc(1, sizeof(HUFF_ENTRY));
-    HuffDst->Value = HuffSrc->Value;
-    if (HuffSrc->Value < 0) {
-      HuffDst->ZeroChild = CopyHuffTree(HuffSrc->ZeroChild);
-      HuffDst->OneChild = CopyHuffTree(HuffSrc->OneChild);
-    }
-    return HuffDst;
-  }
-  return NULL;
-}
-
-void InitHuffmanTrees(PB_INSTANCE *pbi, const codec_setup_info *ci) {
-  int i;
-  pbi->ExtraBitLengths_VP3x = ExtraBitLengths_VP31;
-  for(i=0; i<NUM_HUFF_TABLES; i++){
-    pbi->HuffRoot_VP3x[i] = CopyHuffTree(ci->HuffRoot[i]);
-  }
-}
-
-void  ClearHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES]){
-  int i;
-  for(i=0; i<NUM_HUFF_TABLES; i++) {
-    DestroyHuffTree(HuffRoot[i]);
-    HuffRoot[i] = NULL;
-  }
-}

+ 0 - 74
Engine/lib/libtheora/lib/enc/encoder_huffman.h

@@ -1,74 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: encoder_huffman.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-/********************************************************************
- *  Constants
- ********************************************************************/
-#define NUM_HUFF_TABLES         80
-#define DC_HUFF_OFFSET          0
-#define AC_HUFF_OFFSET          16
-#define AC_TABLE_2_THRESH       5
-#define AC_TABLE_3_THRESH       14
-#define AC_TABLE_4_THRESH       27
-
-#define DC_HUFF_CHOICES         16
-#define DC_HUFF_CHOICE_BITS     4
-
-#define AC_HUFF_CHOICES         16
-#define AC_HUFF_CHOICE_BITS     4
-
-/* Constants assosciated with entropy tokenisation. */
-#define MAX_SINGLE_TOKEN_VALUE  6
-#define DCT_VAL_CAT2_MIN        3
-#define DCT_VAL_CAT3_MIN        7
-#define DCT_VAL_CAT4_MIN        9
-#define DCT_VAL_CAT5_MIN        13
-#define DCT_VAL_CAT6_MIN        21
-#define DCT_VAL_CAT7_MIN        37
-#define DCT_VAL_CAT8_MIN        69
-
-#define DCT_EOB_TOKEN           0
-#define DCT_EOB_PAIR_TOKEN      1
-#define DCT_EOB_TRIPLE_TOKEN    2
-#define DCT_REPEAT_RUN_TOKEN    3
-#define DCT_REPEAT_RUN2_TOKEN   4
-#define DCT_REPEAT_RUN3_TOKEN   5
-#define DCT_REPEAT_RUN4_TOKEN   6
-
-#define DCT_SHORT_ZRL_TOKEN     7
-#define DCT_ZRL_TOKEN           8
-
-#define ONE_TOKEN               9       /* Special tokens for -1,1,-2,2 */
-#define MINUS_ONE_TOKEN         10
-#define TWO_TOKEN               11
-#define MINUS_TWO_TOKEN         12
-
-#define LOW_VAL_TOKENS          (MINUS_TWO_TOKEN + 1)
-#define DCT_VAL_CATEGORY3       (LOW_VAL_TOKENS + 4)
-#define DCT_VAL_CATEGORY4       (DCT_VAL_CATEGORY3 + 1)
-#define DCT_VAL_CATEGORY5       (DCT_VAL_CATEGORY4 + 1)
-#define DCT_VAL_CATEGORY6       (DCT_VAL_CATEGORY5 + 1)
-#define DCT_VAL_CATEGORY7       (DCT_VAL_CATEGORY6 + 1)
-#define DCT_VAL_CATEGORY8       (DCT_VAL_CATEGORY7 + 1)
-
-#define DCT_RUN_CATEGORY1       (DCT_VAL_CATEGORY8 + 1)
-#define DCT_RUN_CATEGORY1B      (DCT_RUN_CATEGORY1 + 5)
-#define DCT_RUN_CATEGORY1C      (DCT_RUN_CATEGORY1B + 1)
-#define DCT_RUN_CATEGORY2       (DCT_RUN_CATEGORY1C + 1)
-
-/* 32 */
-#define MAX_ENTROPY_TOKENS      (DCT_RUN_CATEGORY2 + 2)

+ 0 - 572
Engine/lib/libtheora/lib/enc/encoder_idct.c

@@ -1,572 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function: C implementation of the Theora iDCT
-  last mod: $Id: encoder_idct.c 14714 2008-04-12 01:04:43Z giles $
-
- ********************************************************************/
-
-#include <string.h>
-#include "codec_internal.h"
-
-#include "quant_lookup.h"
-
-#define IdctAdjustBeforeShift 8
-/* cos(n*pi/16) or sin(8-n)*pi/16) */
-#define xC1S7 64277
-#define xC2S6 60547
-#define xC3S5 54491
-#define xC4S4 46341
-#define xC5S3 36410
-#define xC6S2 25080
-#define xC7S1 12785
-
-/* compute the 16 bit signed 1D inverse DCT - spec version */
-/*
-static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
-  ogg_int32_t t[8], r;
-  ogg_int16_t *y = InputData;
-  ogg_int16_t *x = OutputData;
-
-  t[0] = y[0] + y[4];
-  t[0] &= 0xffff;
-  t[0] = (xC4S4 * t[0]) >> 16;
-
-  t[1] = y[0] - y[4];
-  t[1] &= 0xffff;
-  t[1] = (xC4S4 * t[1]) >> 16;
-
-  t[2] = ((xC6S2 * y[2]) >> 16) - ((xC2S6 * y[6]) >> 16);
-  t[3] = ((xC2S6 * y[2]) >> 16) + ((xC6S2 * y[6]) >> 16);
-  t[4] = ((xC7S1 * y[1]) >> 16) - ((xC1S7 * y[7]) >> 16);
-  t[5] = ((xC3S5 * y[5]) >> 16) - ((xC5S3 * y[3]) >> 16);
-  t[6] = ((xC5S3 * y[5]) >> 16) + ((xC3S5 * y[3]) >> 16);
-  t[7] = ((xC1S7 * y[1]) >> 16) + ((xC7S1 * y[7]) >> 16);
-
-  r = t[4] + t[5];
-  t[5] = t[4] - t[5];
-  t[5] &= 0xffff;
-  t[5] = (xC4S4 * (-t[5])) >> 16;
-  t[4] = r;
-
-  r = t[7] + t[6];
-  t[6] = t[7] - t[6];
-  t[6] &= 0xffff;
-  t[6] = (xC4S4 * t[6]) >> 16;
-  t[7] = r;
-
-  r = t[0] + t[3];
-  t[3] = t[0] - t[3];
-  t[0] = r;
-
-  r = t[1] + t[2];
-  t[2] = t[1] - t[2];
-  t[1] = r;
-
-  r = t[6] + t[5];
-  t[5] = t[6] - t[5];
-  t[6] = r;
-
-  r = t[0] + t[7];
-  r &= 0xffff;
-  x[0] = r;
-
-  r = t[1] + t[6];
-  r &= 0xffff;
-  x[1] = r;
-
-  r = t[2] + t[5];
-  r &= 0xffff;
-  x[2] = r;
-
-  r = t[3] + t[4];
-  r &= 0xffff;
-  x[3] = r;
-
-  r = t[3] - t[4];
-  r &= 0xffff;
-  x[4] = r;
-
-  r = t[2] - t[5];
-  r &= 0xffff;
-  x[5] = r;
-
-  r = t[1] - t[6];
-  r &= 0xffff;
-  x[6] = r;
-
-  r = t[0] - t[7];
-  r &= 0xffff;
-  x[7] = r;
-
-}
-*/
-
-static void dequant_slow( ogg_int16_t * dequant_coeffs,
-                   ogg_int16_t * quantized_list,
-                   ogg_int32_t * DCT_block) {
-  int i;
-  for(i=0;i<64;i++)
-    DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
-}
-
-
-
-void IDctSlow__c(  Q_LIST_ENTRY * InputData,
-                ogg_int16_t *QuantMatrix,
-                ogg_int16_t * OutputData ) {
-  ogg_int32_t IntermediateData[64];
-  ogg_int32_t * ip = IntermediateData;
-  ogg_int16_t * op = OutputData;
-
-  ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
-  ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
-  ogg_int32_t t1, t2;
-
-  int loop;
-
-  dequant_slow( QuantMatrix, InputData, IntermediateData);
-
-  /* Inverse DCT on the rows now */
-  for ( loop = 0; loop < 8; loop++){
-    /* Check for non-zero values */
-    if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
-      t1 = (xC1S7 * ip[1]);
-      t2 = (xC7S1 * ip[7]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _A = t1 + t2;
-
-      t1 = (xC7S1 * ip[1]);
-      t2 = (xC1S7 * ip[7]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _B = t1 - t2;
-
-      t1 = (xC3S5 * ip[3]);
-      t2 = (xC5S3 * ip[5]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _C = t1 + t2;
-
-      t1 = (xC3S5 * ip[5]);
-      t2 = (xC5S3 * ip[3]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _D = t1 - t2;
-
-      t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
-      t1 >>= 16;
-      _Ad = t1;
-
-      t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
-      t1 >>= 16;
-      _Bd = t1;
-
-
-      _Cd = _A + _C;
-      _Dd = _B + _D;
-
-      t1 = (xC4S4 * (ogg_int16_t)(ip[0] + ip[4]));
-      t1 >>= 16;
-      _E = t1;
-
-      t1 = (xC4S4 * (ogg_int16_t)(ip[0] - ip[4]));
-      t1 >>= 16;
-      _F = t1;
-
-      t1 = (xC2S6 * ip[2]);
-      t2 = (xC6S2 * ip[6]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _G = t1 + t2;
-
-      t1 = (xC6S2 * ip[2]);
-      t2 = (xC2S6 * ip[6]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _H = t1 - t2;
-
-
-      _Ed = _E - _G;
-      _Gd = _E + _G;
-
-      _Add = _F + _Ad;
-      _Bdd = _Bd - _H;
-
-      _Fd = _F - _Ad;
-      _Hd = _Bd + _H;
-
-      /* Final sequence of operations over-write original inputs. */
-      ip[0] = (ogg_int16_t)((_Gd + _Cd )   >> 0);
-      ip[7] = (ogg_int16_t)((_Gd - _Cd )   >> 0);
-
-      ip[1] = (ogg_int16_t)((_Add + _Hd )  >> 0);
-      ip[2] = (ogg_int16_t)((_Add - _Hd )  >> 0);
-
-      ip[3] = (ogg_int16_t)((_Ed + _Dd )   >> 0);
-      ip[4] = (ogg_int16_t)((_Ed - _Dd )   >> 0);
-
-      ip[5] = (ogg_int16_t)((_Fd + _Bdd )  >> 0);
-      ip[6] = (ogg_int16_t)((_Fd - _Bdd )  >> 0);
-
-    }
-
-    ip += 8;                    /* next row */
-  }
-
-  ip = IntermediateData;
-
-  for ( loop = 0; loop < 8; loop++){
-    /* Check for non-zero values (bitwise or faster than ||) */
-    if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
-         ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
-
-      t1 = (xC1S7 * ip[1*8]);
-      t2 = (xC7S1 * ip[7*8]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _A = t1 + t2;
-
-      t1 = (xC7S1 * ip[1*8]);
-      t2 = (xC1S7 * ip[7*8]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _B = t1 - t2;
-
-      t1 = (xC3S5 * ip[3*8]);
-      t2 = (xC5S3 * ip[5*8]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _C = t1 + t2;
-
-      t1 = (xC3S5 * ip[5*8]);
-      t2 = (xC5S3 * ip[3*8]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _D = t1 - t2;
-
-      t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
-      t1 >>= 16;
-      _Ad = t1;
-
-      t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
-      t1 >>= 16;
-      _Bd = t1;
-
-
-      _Cd = _A + _C;
-      _Dd = _B + _D;
-
-      t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] + ip[4*8]));
-      t1 >>= 16;
-      _E = t1;
-
-      t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] - ip[4*8]));
-      t1 >>= 16;
-      _F = t1;
-
-      t1 = (xC2S6 * ip[2*8]);
-      t2 = (xC6S2 * ip[6*8]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _G = t1 + t2;
-
-      t1 = (xC6S2 * ip[2*8]);
-      t2 = (xC2S6 * ip[6*8]);
-      t1 >>= 16;
-      t2 >>= 16;
-      _H = t1 - t2;
-
-      _Ed = _E - _G;
-      _Gd = _E + _G;
-
-      _Add = _F + _Ad;
-      _Bdd = _Bd - _H;
-
-      _Fd = _F - _Ad;
-      _Hd = _Bd + _H;
-
-      _Gd += IdctAdjustBeforeShift;
-      _Add += IdctAdjustBeforeShift;
-      _Ed += IdctAdjustBeforeShift;
-      _Fd += IdctAdjustBeforeShift;
-
-      /* Final sequence of operations over-write original inputs. */
-      op[0*8] = (ogg_int16_t)((_Gd + _Cd )   >> 4);
-      op[7*8] = (ogg_int16_t)((_Gd - _Cd )   >> 4);
-
-      op[1*8] = (ogg_int16_t)((_Add + _Hd )  >> 4);
-      op[2*8] = (ogg_int16_t)((_Add - _Hd )  >> 4);
-
-      op[3*8] = (ogg_int16_t)((_Ed + _Dd )   >> 4);
-      op[4*8] = (ogg_int16_t)((_Ed - _Dd )   >> 4);
-
-      op[5*8] = (ogg_int16_t)((_Fd + _Bdd )  >> 4);
-      op[6*8] = (ogg_int16_t)((_Fd - _Bdd )  >> 4);
-    }else{
-      op[0*8] = 0;
-      op[7*8] = 0;
-      op[1*8] = 0;
-      op[2*8] = 0;
-      op[3*8] = 0;
-      op[4*8] = 0;
-      op[5*8] = 0;
-      op[6*8] = 0;
-    }
-
-    ip++;                       /* next column */
-    op++;
-  }
-}
-
-/************************
-  x  x  x  x  0  0  0  0
-  x  x  x  0  0  0  0  0
-  x  x  0  0  0  0  0  0
-  x  0  0  0  0  0  0  0
-  0  0  0  0  0  0  0  0
-  0  0  0  0  0  0  0  0
-  0  0  0  0  0  0  0  0
-  0  0  0  0  0  0  0  0
-*************************/
-
-static void dequant_slow10( ogg_int16_t * dequant_coeffs,
-                     ogg_int16_t * quantized_list,
-                     ogg_int32_t * DCT_block){
-  int i;
-  memset(DCT_block,0, 128);
-  for(i=0;i<10;i++)
-    DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
-
-}
-
-void IDct10__c( Q_LIST_ENTRY * InputData,
-             ogg_int16_t *QuantMatrix,
-             ogg_int16_t * OutputData ){
-  ogg_int32_t IntermediateData[64];
-  ogg_int32_t * ip = IntermediateData;
-  ogg_int16_t * op = OutputData;
-
-  ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
-  ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
-  ogg_int32_t t1, t2;
-
-  int loop;
-
-  dequant_slow10( QuantMatrix, InputData, IntermediateData);
-
-  /* Inverse DCT on the rows now */
-  for ( loop = 0; loop < 4; loop++){
-    /* Check for non-zero values */
-    if ( ip[0] | ip[1] | ip[2] | ip[3] ){
-      t1 = (xC1S7 * ip[1]);
-      t1 >>= 16;
-      _A = t1;
-
-      t1 = (xC7S1 * ip[1]);
-      t1 >>= 16;
-      _B = t1 ;
-
-      t1 = (xC3S5 * ip[3]);
-      t1 >>= 16;
-      _C = t1;
-
-      t2 = (xC5S3 * ip[3]);
-      t2 >>= 16;
-      _D = -t2;
-
-
-      t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
-      t1 >>= 16;
-      _Ad = t1;
-
-      t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
-      t1 >>= 16;
-      _Bd = t1;
-
-
-      _Cd = _A + _C;
-      _Dd = _B + _D;
-
-      t1 = (xC4S4 * ip[0] );
-      t1 >>= 16;
-      _E = t1;
-
-      _F = t1;
-
-      t1 = (xC2S6 * ip[2]);
-      t1 >>= 16;
-      _G = t1;
-
-      t1 = (xC6S2 * ip[2]);
-      t1 >>= 16;
-      _H = t1 ;
-
-
-      _Ed = _E - _G;
-      _Gd = _E + _G;
-
-      _Add = _F + _Ad;
-      _Bdd = _Bd - _H;
-
-      _Fd = _F - _Ad;
-      _Hd = _Bd + _H;
-
-      /* Final sequence of operations over-write original inputs. */
-      ip[0] = (ogg_int16_t)((_Gd + _Cd )   >> 0);
-      ip[7] = (ogg_int16_t)((_Gd - _Cd )   >> 0);
-
-      ip[1] = (ogg_int16_t)((_Add + _Hd )  >> 0);
-      ip[2] = (ogg_int16_t)((_Add - _Hd )  >> 0);
-
-      ip[3] = (ogg_int16_t)((_Ed + _Dd )   >> 0);
-      ip[4] = (ogg_int16_t)((_Ed - _Dd )   >> 0);
-
-      ip[5] = (ogg_int16_t)((_Fd + _Bdd )  >> 0);
-      ip[6] = (ogg_int16_t)((_Fd - _Bdd )  >> 0);
-
-    }
-
-    ip += 8;                    /* next row */
-  }
-
-  ip = IntermediateData;
-
-  for ( loop = 0; loop < 8; loop++) {
-    /* Check for non-zero values (bitwise or faster than ||) */
-    if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] ) {
-
-      t1 = (xC1S7 * ip[1*8]);
-      t1 >>= 16;
-      _A = t1 ;
-
-      t1 = (xC7S1 * ip[1*8]);
-      t1 >>= 16;
-      _B = t1 ;
-
-      t1 = (xC3S5 * ip[3*8]);
-      t1 >>= 16;
-      _C = t1 ;
-
-      t2 = (xC5S3 * ip[3*8]);
-      t2 >>= 16;
-      _D = - t2;
-
-
-      t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
-      t1 >>= 16;
-      _Ad = t1;
-
-      t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
-      t1 >>= 16;
-      _Bd = t1;
-
-
-      _Cd = _A + _C;
-      _Dd = _B + _D;
-
-      t1 = (xC4S4 * ip[0*8]);
-      t1 >>= 16;
-      _E = t1;
-      _F = t1;
-
-      t1 = (xC2S6 * ip[2*8]);
-      t1 >>= 16;
-      _G = t1;
-
-      t1 = (xC6S2 * ip[2*8]);
-      t1 >>= 16;
-      _H = t1;
-
-
-      _Ed = _E - _G;
-      _Gd = _E + _G;
-
-      _Add = _F + _Ad;
-      _Bdd = _Bd - _H;
-
-      _Fd = _F - _Ad;
-      _Hd = _Bd + _H;
-
-      _Gd += IdctAdjustBeforeShift;
-      _Add += IdctAdjustBeforeShift;
-      _Ed += IdctAdjustBeforeShift;
-      _Fd += IdctAdjustBeforeShift;
-
-      /* Final sequence of operations over-write original inputs. */
-      op[0*8] = (ogg_int16_t)((_Gd + _Cd )   >> 4);
-      op[7*8] = (ogg_int16_t)((_Gd - _Cd )   >> 4);
-
-      op[1*8] = (ogg_int16_t)((_Add + _Hd )  >> 4);
-      op[2*8] = (ogg_int16_t)((_Add - _Hd )  >> 4);
-
-      op[3*8] = (ogg_int16_t)((_Ed + _Dd )   >> 4);
-      op[4*8] = (ogg_int16_t)((_Ed - _Dd )   >> 4);
-
-      op[5*8] = (ogg_int16_t)((_Fd + _Bdd )  >> 4);
-      op[6*8] = (ogg_int16_t)((_Fd - _Bdd )  >> 4);
-    }else{
-      op[0*8] = 0;
-      op[7*8] = 0;
-      op[1*8] = 0;
-      op[2*8] = 0;
-      op[3*8] = 0;
-      op[4*8] = 0;
-      op[5*8] = 0;
-      op[6*8] = 0;
-    }
-
-    ip++;                       /* next column */
-    op++;
-  }
-}
-
-/***************************
-  x   0   0  0  0  0  0  0
-  0   0   0  0  0  0  0  0
-  0   0   0  0  0  0  0  0
-  0   0   0  0  0  0  0  0
-  0   0   0  0  0  0  0  0
-  0   0   0  0  0  0  0  0
-  0   0   0  0  0  0  0  0
-  0   0   0  0  0  0  0  0
-**************************/
-
-void IDct1( Q_LIST_ENTRY * InputData,
-            ogg_int16_t *QuantMatrix,
-            ogg_int16_t * OutputData ){
-  int loop;
-
-  ogg_int16_t  OutD;
-
-  OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
-
-  for(loop=0;loop<64;loop++)
-    OutputData[loop]=OutD;
-
-}
-
-void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
-  funcs->IDctSlow = IDctSlow__c;
-  funcs->IDct10 = IDct10__c;
-  funcs->IDct3 = IDct10__c;
-#if defined(USE_ASM)
-  // todo: make mmx encoder idct for MSC one day...
-#if !defined (_MSC_VER)
-  if (cpu_flags & OC_CPU_X86_MMX) {
-    dsp_mmx_idct_init(funcs);
-  }
-#endif
-#endif
-}

+ 0 - 120
Engine/lib/libtheora/lib/enc/encoder_lookup.h

@@ -1,120 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function: simple static lookups for VP3 frame encoder
-  last mod: $Id: encoder_lookup.h 15323 2008-09-19 19:43:59Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static const ogg_uint32_t MvPattern[(MAX_MV_EXTENT * 2) + 1] = {
-  0x000000ff, 0x000000fd, 0x000000fb, 0x000000f9,
-  0x000000f7, 0x000000f5, 0x000000f3, 0x000000f1,
-  0x000000ef, 0x000000ed, 0x000000eb, 0x000000e9,
-  0x000000e7, 0x000000e5, 0x000000e3, 0x000000e1,
-  0x0000006f, 0x0000006d, 0x0000006b, 0x00000069,
-  0x00000067, 0x00000065, 0x00000063, 0x00000061,
-  0x0000002f, 0x0000002d, 0x0000002b, 0x00000029,
-  0x00000009, 0x00000007, 0x00000002, 0x00000000,
-  0x00000001, 0x00000006, 0x00000008, 0x00000028,
-  0x0000002a, 0x0000002c, 0x0000002e, 0x00000060,
-  0x00000062, 0x00000064, 0x00000066, 0x00000068,
-  0x0000006a, 0x0000006c, 0x0000006e, 0x000000e0,
-  0x000000e2, 0x000000e4, 0x000000e6, 0x000000e8,
-  0x000000ea, 0x000000ec, 0x000000ee, 0x000000f0,
-  0x000000f2, 0x000000f4, 0x000000f6, 0x000000f8,
-  0x000000fa, 0x000000fc, 0x000000fe,
-};
-
-static const ogg_uint32_t MvBits[(MAX_MV_EXTENT * 2) + 1] = {
-  8, 8, 8, 8, 8, 8, 8, 8,
-  8, 8, 8, 8, 8, 8, 8, 8,
-  7, 7, 7, 7, 7, 7, 7, 7,
-  6, 6, 6, 6, 4, 4, 3, 3,
-  3, 4, 4, 6, 6, 6, 6, 7,
-  7, 7, 7, 7, 7, 7, 7, 8,
-  8, 8, 8, 8, 8, 8, 8, 8,
-  8, 8, 8, 8, 8, 8, 8,
-};
-
-static const ogg_uint32_t MvPattern2[(MAX_MV_EXTENT * 2) + 1] = {
-  0x0000003f, 0x0000003d, 0x0000003b, 0x00000039,
-  0x00000037, 0x00000035, 0x00000033, 0x00000031,
-  0x0000002f, 0x0000002d, 0x0000002b, 0x00000029,
-  0x00000027, 0x00000025, 0x00000023, 0x00000021,
-  0x0000001f, 0x0000001d, 0x0000001b, 0x00000019,
-  0x00000017, 0x00000015, 0x00000013, 0x00000011,
-  0x0000000f, 0x0000000d, 0x0000000b, 0x00000009,
-  0x00000007, 0x00000005, 0x00000003, 0x00000000,
-  0x00000002, 0x00000004, 0x00000006, 0x00000008,
-  0x0000000a, 0x0000000c, 0x0000000e, 0x00000010,
-  0x00000012, 0x00000014, 0x00000016, 0x00000018,
-  0x0000001a, 0x0000001c, 0x0000001e, 0x00000020,
-  0x00000022, 0x00000024, 0x00000026, 0x00000028,
-  0x0000002a, 0x0000002c, 0x0000002e, 0x00000030,
-  0x00000032, 0x00000034, 0x00000036, 0x00000038,
-  0x0000003a, 0x0000003c, 0x0000003e,
-};
-
-static const ogg_uint32_t MvBits2[(MAX_MV_EXTENT * 2) + 1] = {
-  6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6,
-};
-
-static const ogg_uint32_t ModeBitPatterns[MAX_MODES] = {
-  0x00, 0x02, 0x06, 0x0E, 0x1E, 0x3E, 0x7E, 0x7F };
-
-static const ogg_int32_t ModeBitLengths[MAX_MODES] =  {
-  1,    2,    3,    4,    5,    6,    7,    7 };
-
-static const unsigned char ModeSchemes[MODE_METHODS-2][MAX_MODES] =  {
-  /* Last Mv dominates */
-  { 3,    4,    2,    0,    1,    5,    6,    7 },    /* L P  M N I G GM 4 */
-  { 2,    4,    3,    0,    1,    5,    6,    7 },    /* L P  N M I G GM 4 */
-  { 3,    4,    1,    0,    2,    5,    6,    7 },    /* L M  P N I G GM 4 */
-  { 2,    4,    1,    0,    3,    5,    6,    7 },    /* L M  N P I G GM 4 */
-
-  /* No MV dominates */
-  { 0,    4,    3,    1,    2,    5,    6,    7 },    /* N L  P M I G GM 4 */
-  { 0,    5,    4,    2,    3,    1,    6,    7 },    /* N G  L P M I GM 4 */
-
-};
-
-
-static const ogg_uint32_t MvThreshTable[Q_TABLE_SIZE] = {
-  65, 65, 65, 65, 50, 50, 50, 50,
-  40, 40, 40, 40, 40, 40, 40, 40,
-  30, 30, 30, 30, 30, 30, 30, 30,
-  20, 20, 20, 20, 20, 20, 20, 20,
-  15, 15, 15, 15, 15, 15, 15, 15,
-  10, 10, 10, 10, 10, 10, 10, 10,
-  5,  5,  5,  5,  5,  5,  5,  5,
-  0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static const ogg_uint32_t MVChangeFactorTable[Q_TABLE_SIZE] = {
-  11, 11, 11, 11, 12, 12, 12, 12,
-  13, 13, 13, 13, 13, 13, 13, 13,
-  14, 14, 14, 14, 14, 14, 14, 14,
-  14, 14, 14, 14, 14, 14, 14, 14,
-  14, 14, 14, 14, 14, 14, 14, 14,
-  14, 14, 14, 14, 14, 14, 14, 14,
-  15, 15, 15, 15, 15, 15, 15, 15,
-  15, 15, 15, 15, 15, 15, 15, 15
-};

+ 0 - 558
Engine/lib/libtheora/lib/enc/encoder_quant.c

@@ -1,558 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2005                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: encoder_quant.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include "codec_internal.h"
-#include "quant_lookup.h"
-
-#define OC_QUANT_MAX        (1024<<2)
-static const unsigned DC_QUANT_MIN[2]={4<<2,8<<2};
-static const unsigned AC_QUANT_MIN[2]={2<<2,4<<2};
-#define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
-#define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
-#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
-
-static int ilog(unsigned _v){
-  int ret;
-  for(ret=0;_v;ret++)_v>>=1;
-  return ret;
-}
-
-
-void WriteQTables(PB_INSTANCE *pbi,oggpack_buffer* _opb) {
-
-  th_quant_info *_qinfo = &pbi->quant_info;
-
-  const th_quant_ranges *qranges;
-  const th_quant_base   *base_mats[2*3*64];
-  int                    indices[2][3][64];
-  int                    nbase_mats;
-  int                    nbits;
-  int                    ci;
-  int                    qi;
-  int                    qri;
-  int                    qti;
-  int                    pli;
-  int                    qtj;
-  int                    plj;
-  int                    bmi;
-  int                    i;
-
-  /*Unlike the scale tables, we can't assume the maximum value will be in
-     index 0, so search for it here.*/
-  i=_qinfo->loop_filter_limits[0];
-  for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]);
-  nbits=ilog(i);
-  oggpackB_write(_opb,nbits,3);
-  for(qi=0;qi<64;qi++){
-    oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits);
-  }
-  /* 580 bits for VP3.*/
-  nbits=OC_MAXI(ilog(_qinfo->ac_scale[0]),1);
-  oggpackB_write(_opb,nbits-1,4);
-  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits);
-  /* 516 bits for VP3.*/
-  nbits=OC_MAXI(ilog(_qinfo->dc_scale[0]),1);
-  oggpackB_write(_opb,nbits-1,4);
-  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits);
-  /*Consolidate any duplicate base matrices.*/
-  nbase_mats=0;
-  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
-    qranges=_qinfo->qi_ranges[qti]+pli;
-    for(qri=0;qri<=qranges->nranges;qri++){
-      for(bmi=0;;bmi++){
-        if(bmi>=nbase_mats){
-          base_mats[bmi]=qranges->base_matrices+qri;
-          indices[qti][pli][qri]=nbase_mats++;
-          break;
-        }
-        else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri],
-         sizeof(base_mats[bmi][0]))==0){
-          indices[qti][pli][qri]=bmi;
-          break;
-        }
-      }
-    }
-  }
-  /*Write out the list of unique base matrices.
-    1545 bits for VP3 matrices.*/
-  oggpackB_write(_opb,nbase_mats-1,9);
-  for(bmi=0;bmi<nbase_mats;bmi++){
-    for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8);
-  }
-  /*Now store quant ranges and their associated indices into the base matrix
-     list.
-     46 bits for VP3 matrices.*/
-  nbits=ilog(nbase_mats-1);
-  for(i=0;i<6;i++){
-    qti=i/3;
-    pli=i%3;
-    qranges=_qinfo->qi_ranges[qti]+pli;
-    if(i>0){
-      if(qti>0){
-        if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&&
-         memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes,
-         qranges->nranges*sizeof(qranges->sizes[0]))==0&&
-         memcmp(indices[qti][pli],indices[qti-1][pli],
-         (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
-          oggpackB_write(_opb,1,2);
-          continue;
-        }
-      }
-      qtj=(i-1)/3;
-      plj=(i-1)%3;
-      if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&&
-       memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes,
-       qranges->nranges*sizeof(qranges->sizes[0]))==0&&
-       memcmp(indices[qti][pli],indices[qtj][plj],
-       (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
-        oggpackB_write(_opb,0,1+(qti>0));
-        continue;
-      }
-      oggpackB_write(_opb,1,1);
-    }
-    oggpackB_write(_opb,indices[qti][pli][0],nbits);
-    for(qi=qri=0;qi<63;qri++){
-      oggpackB_write(_opb,qranges->sizes[qri]-1,ilog(62-qi));
-      qi+=qranges->sizes[qri];
-      oggpackB_write(_opb,indices[qti][pli][qri+1],nbits);
-    }
-  }
-}
-
-/* a copied/reconciled version of derf's theora-exp code; redundancy
-   should be eliminated at some point */
-void InitQTables( PB_INSTANCE *pbi ){
-  int            qti; /* coding mode: intra or inter */
-  int            pli; /* Y U V */
-  th_quant_info *qinfo = &pbi->quant_info;
-
-  pbi->QThreshTable = pbi->quant_info.ac_scale;
-
-  for(qti=0;qti<2;qti++){
-    for(pli=0;pli<3;pli++){
-      int qi;  /* quality index */
-      int qri; /* range iterator */
-
-      for(qi=0,qri=0; qri<=qinfo->qi_ranges[qti][pli].nranges; qri++){
-        th_quant_base base;
-
-        ogg_uint32_t      q;
-        int               qi_start;
-        int               qi_end;
-        int               ci;
-        memcpy(base,qinfo->qi_ranges[qti][pli].base_matrices[qri],
-               sizeof(base));
-
-        qi_start=qi;
-        if(qri==qinfo->qi_ranges[qti][pli].nranges)
-          qi_end=qi+1;
-        else
-          qi_end=qi+qinfo->qi_ranges[qti][pli].sizes[qri];
-
-        /* Iterate over quality indicies in this range */
-        for(;;){
-
-          /*Scale DC the coefficient from the proper table.*/
-          q=((ogg_uint32_t)qinfo->dc_scale[qi]*base[0]/100)<<2;
-          q=OC_CLAMPI(DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
-          pbi->quant_tables[qti][pli][qi][0]=(ogg_uint16_t)q;
-
-          /*Now scale AC coefficients from the proper table.*/
-          for(ci=1;ci<64;ci++){
-            q=((ogg_uint32_t)qinfo->ac_scale[qi]*base[ci]/100)<<2;
-            q=OC_CLAMPI(AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
-            pbi->quant_tables[qti][pli][qi][ci]=(ogg_uint16_t)q;
-          }
-
-          if(++qi>=qi_end)break;
-
-          /*Interpolate the next base matrix.*/
-          for(ci=0;ci<64;ci++){
-            base[ci]=(unsigned char)
-              ((2*((qi_end-qi)*qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
-                   (qi-qi_start)*qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
-                +qinfo->qi_ranges[qti][pli].sizes[qri])/
-               (2*qinfo->qi_ranges[qti][pli].sizes[qri]));
-          }
-        }
-      }
-    }
-  }
-}
-
-static void BuildZigZagIndex(PB_INSTANCE *pbi){
-  ogg_int32_t i,j;
-
-  /* invert the row to zigzag coeffient order lookup table */
-  for ( i = 0; i < BLOCK_SIZE; i++ ){
-    j = dezigzag_index[i];
-    pbi->zigzag_index[j] = i;
-  }
-}
-
-static void init_quantizer ( CP_INSTANCE *cpi,
-                             unsigned char QIndex ){
-  int i;
-  double ZBinFactor;
-  double RoundingFactor;
-
-  double temp_fp_quant_coeffs;
-  double temp_fp_quant_round;
-  double temp_fp_ZeroBinSize;
-  PB_INSTANCE *pbi = &cpi->pb;
-
-
-  const ogg_uint16_t * temp_Y_coeffs;
-  const ogg_uint16_t * temp_U_coeffs;
-  const ogg_uint16_t * temp_V_coeffs;
-  const ogg_uint16_t * temp_Inter_Y_coeffs;
-  const ogg_uint16_t * temp_Inter_U_coeffs;
-  const ogg_uint16_t * temp_Inter_V_coeffs;
-  ogg_uint16_t scale_factor = cpi->pb.quant_info.ac_scale[QIndex];
-
-  /* Notes on setup of quantisers.  The initial multiplication by
-     the scale factor is done in the ogg_int32_t domain to insure that the
-     precision in the quantiser is the same as in the inverse
-     quantiser where all calculations are integer.  The "<< 2" is a
-     normalisation factor for the forward DCT transform. */
-
-  temp_Y_coeffs = pbi->quant_tables[0][0][QIndex];
-  temp_U_coeffs = pbi->quant_tables[0][1][QIndex];
-  temp_V_coeffs = pbi->quant_tables[0][2][QIndex];
-  temp_Inter_Y_coeffs = pbi->quant_tables[1][0][QIndex];
-  temp_Inter_U_coeffs = pbi->quant_tables[1][1][QIndex];
-  temp_Inter_V_coeffs = pbi->quant_tables[1][2][QIndex];
-
-  ZBinFactor = 0.9;
-
-  switch(cpi->pb.info.sharpness){
-  case 0:
-    ZBinFactor = 0.65;
-    if ( scale_factor <= 50 )
-        RoundingFactor = 0.499;
-      else
-        RoundingFactor = 0.46;
-      break;
-    case 1:
-      ZBinFactor = 0.75;
-      if ( scale_factor <= 50 )
-        RoundingFactor = 0.476;
-      else
-        RoundingFactor = 0.400;
-      break;
-
-    default:
-      ZBinFactor = 0.9;
-      if ( scale_factor <= 50 )
-        RoundingFactor = 0.476;
-      else
-        RoundingFactor = 0.333;
-      break;
-    }
-
-    /* Use fixed multiplier for intra Y DC */
-    temp_fp_quant_coeffs = temp_Y_coeffs[0];
-    temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-    pbi->fp_quant_Y_round[0]    = (ogg_int32_t) (0.5 + temp_fp_quant_round);
-    temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-    pbi->fp_ZeroBinSize_Y[0]    = (ogg_int32_t) (0.5 + temp_fp_ZeroBinSize);
-    temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-    pbi->fp_quant_Y_coeffs[0]   = (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-    /* Intra U */
-    temp_fp_quant_coeffs = temp_U_coeffs[0];
-    temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-    pbi->fp_quant_U_round[0]   = (0.5 + temp_fp_quant_round);
-    temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-    pbi->fp_ZeroBinSize_U[0]   = (0.5 + temp_fp_ZeroBinSize);
-    temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-    pbi->fp_quant_U_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-    /* Intra V */
-    temp_fp_quant_coeffs = temp_V_coeffs[0];
-    temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-    pbi->fp_quant_V_round[0]   = (0.5 + temp_fp_quant_round);
-    temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-    pbi->fp_ZeroBinSize_V[0]   = (0.5 + temp_fp_ZeroBinSize);
-    temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-    pbi->fp_quant_V_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-
-    /* Inter Y */
-    temp_fp_quant_coeffs = temp_Inter_Y_coeffs[0];
-    temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-    pbi->fp_quant_Inter_Y_round[0]= (0.5 + temp_fp_quant_round);
-    temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-    pbi->fp_ZeroBinSize_Inter_Y[0]= (0.5 + temp_fp_ZeroBinSize);
-    temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
-    pbi->fp_quant_Inter_Y_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-    /* Inter U */
-    temp_fp_quant_coeffs = temp_Inter_U_coeffs[0];
-    temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-    pbi->fp_quant_Inter_U_round[0]= (0.5 + temp_fp_quant_round);
-    temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-    pbi->fp_ZeroBinSize_Inter_U[0]= (0.5 + temp_fp_ZeroBinSize);
-    temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
-    pbi->fp_quant_Inter_U_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-    /* Inter V */
-    temp_fp_quant_coeffs = temp_Inter_V_coeffs[0];
-    temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-    pbi->fp_quant_Inter_V_round[0]= (0.5 + temp_fp_quant_round);
-    temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-    pbi->fp_ZeroBinSize_Inter_V[0]= (0.5 + temp_fp_ZeroBinSize);
-    temp_fp_quant_coeffs= 1.0 / temp_fp_quant_coeffs;
-    pbi->fp_quant_Inter_V_coeffs[0]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-
-    for ( i = 1; i < 64; i++ ){
-      /* Intra Y */
-      temp_fp_quant_coeffs = temp_Y_coeffs[i];
-      temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-      pbi->fp_quant_Y_round[i]  = (0.5 + temp_fp_quant_round);
-      temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-      pbi->fp_ZeroBinSize_Y[i]  = (0.5 + temp_fp_ZeroBinSize);
-      temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-      pbi->fp_quant_Y_coeffs[i] = (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-      /* Intra U */
-      temp_fp_quant_coeffs = temp_U_coeffs[i];
-      temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-      pbi->fp_quant_U_round[i] = (0.5 + temp_fp_quant_round);
-      temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-      pbi->fp_ZeroBinSize_U[i] = (0.5 + temp_fp_ZeroBinSize);
-      temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-      pbi->fp_quant_U_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-      /* Intra V */
-      temp_fp_quant_coeffs = temp_V_coeffs[i];
-      temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-      pbi->fp_quant_V_round[i] = (0.5 + temp_fp_quant_round);
-      temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-      pbi->fp_ZeroBinSize_V[i] = (0.5 + temp_fp_ZeroBinSize);
-      temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-      pbi->fp_quant_V_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-      /* Inter Y */
-      temp_fp_quant_coeffs = temp_Inter_Y_coeffs[i];
-      temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-      pbi->fp_quant_Inter_Y_round[i]= (0.5 + temp_fp_quant_round);
-      temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-      pbi->fp_ZeroBinSize_Inter_Y[i]= (0.5 + temp_fp_ZeroBinSize);
-      temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-      pbi->fp_quant_Inter_Y_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-      /* Inter U */
-      temp_fp_quant_coeffs = temp_Inter_U_coeffs[i];
-      temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-      pbi->fp_quant_Inter_U_round[i]= (0.5 + temp_fp_quant_round);
-      temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-      pbi->fp_ZeroBinSize_Inter_U[i]= (0.5 + temp_fp_ZeroBinSize);
-      temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-      pbi->fp_quant_Inter_U_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-      /* Inter V */
-      temp_fp_quant_coeffs = temp_Inter_V_coeffs[i];
-      temp_fp_quant_round = temp_fp_quant_coeffs * RoundingFactor;
-      pbi->fp_quant_Inter_V_round[i]= (0.5 + temp_fp_quant_round);
-      temp_fp_ZeroBinSize = temp_fp_quant_coeffs * ZBinFactor;
-      pbi->fp_ZeroBinSize_Inter_V[i]= (0.5 + temp_fp_ZeroBinSize);
-      temp_fp_quant_coeffs = 1.0 / temp_fp_quant_coeffs;
-      pbi->fp_quant_Inter_V_coeffs[i]= (0.5 + SHIFT16 * temp_fp_quant_coeffs);
-
-
-    }
-
-    pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs;
-
-}
-
-void select_quantiser(PB_INSTANCE *pbi, int type) {
-  /* select a quantiser according to what plane has to be coded in what
-   * mode. Could be extended to a more sophisticated scheme. */
-
-  switch(type) {
-    case BLOCK_Y:
-      pbi->fquant_coeffs = pbi->fp_quant_Y_coeffs;
-      pbi->fquant_round = pbi->fp_quant_Y_round;
-      pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Y;
-      break;
-    case BLOCK_U:
-      pbi->fquant_coeffs = pbi->fp_quant_U_coeffs;
-      pbi->fquant_round = pbi->fp_quant_U_round;
-      pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_U;
-      break;
-    case BLOCK_V:
-      pbi->fquant_coeffs = pbi->fp_quant_V_coeffs;
-      pbi->fquant_round = pbi->fp_quant_V_round;
-      pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_V;
-      break;
-    case BLOCK_INTER_Y:
-      pbi->fquant_coeffs = pbi->fp_quant_Inter_Y_coeffs;
-      pbi->fquant_round = pbi->fp_quant_Inter_Y_round;
-      pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_Y;
-      break;
-    case BLOCK_INTER_U:
-      pbi->fquant_coeffs = pbi->fp_quant_Inter_U_coeffs;
-      pbi->fquant_round = pbi->fp_quant_Inter_U_round;
-      pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_U;
-      break;
-    case BLOCK_INTER_V:
-      pbi->fquant_coeffs = pbi->fp_quant_Inter_V_coeffs;
-      pbi->fquant_round = pbi->fp_quant_Inter_V_round;
-      pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_Inter_V;
-      break;
-  }
-}
-
-
-void quantize( PB_INSTANCE *pbi,
-               ogg_int16_t * DCT_block,
-               Q_LIST_ENTRY * quantized_list){
-  ogg_uint32_t  i;              /* Row index */
-  Q_LIST_ENTRY  val;            /* Quantised value. */
-
-  ogg_int32_t * FquantRoundPtr = pbi->fquant_round;
-  ogg_int32_t * FquantCoeffsPtr = pbi->fquant_coeffs;
-  ogg_int32_t * FquantZBinSizePtr = pbi->fquant_ZbSize;
-  ogg_int16_t * DCT_blockPtr = DCT_block;
-  ogg_uint32_t * ZigZagPtr = (ogg_uint32_t *)pbi->zigzag_index;
-  ogg_int32_t temp;
-
-  /* Set the quantized_list to default to 0 */
-  memset( quantized_list, 0, 64 * sizeof(Q_LIST_ENTRY) );
-
-  /* Note that we add half divisor to effect rounding on positive number */
-  for( i = 0; i < VFRAGPIXELS; i++) {
-
-    int col;
-    /* Iterate through columns */
-    for( col = 0; col < 8; col++) {
-      if ( DCT_blockPtr[col] >= FquantZBinSizePtr[col] ) {
-        temp = FquantCoeffsPtr[col] * ( DCT_blockPtr[col] + FquantRoundPtr[col] ) ;
-        val = (Q_LIST_ENTRY) (temp>>16);
-        quantized_list[ZigZagPtr[col]] = ( val > 511 ) ? 511 : val;
-      } else if ( DCT_blockPtr[col] <= -FquantZBinSizePtr[col] ) {
-        temp = FquantCoeffsPtr[col] *
-          ( DCT_blockPtr[col] - FquantRoundPtr[col] ) + MIN16;
-        val = (Q_LIST_ENTRY) (temp>>16);
-        quantized_list[ZigZagPtr[col]] = ( val < -511 ) ? -511 : val;
-      }
-    }
-
-    FquantRoundPtr += 8;
-    FquantCoeffsPtr += 8;
-    FquantZBinSizePtr += 8;
-    DCT_blockPtr += 8;
-    ZigZagPtr += 8;
-  }
-}
-
-static void init_dequantizer ( PB_INSTANCE *pbi,
-                               unsigned char  QIndex ){
-  int i, j;
-
-  ogg_uint16_t * InterY_coeffs;
-  ogg_uint16_t * InterU_coeffs;
-  ogg_uint16_t * InterV_coeffs;
-  ogg_uint16_t * Y_coeffs;
-  ogg_uint16_t * U_coeffs;
-  ogg_uint16_t * V_coeffs;
-
-  Y_coeffs = pbi->quant_tables[0][0][QIndex];
-  U_coeffs = pbi->quant_tables[0][1][QIndex];
-  V_coeffs = pbi->quant_tables[0][2][QIndex];
-  InterY_coeffs = pbi->quant_tables[1][0][QIndex];
-  InterU_coeffs = pbi->quant_tables[1][1][QIndex];
-  InterV_coeffs = pbi->quant_tables[1][2][QIndex];
-
-  /* invert the dequant index into the quant index
-     the dxer has a different order than the cxer. */
-  BuildZigZagIndex(pbi);
-
-  /* Reorder dequantisation coefficients into dct zigzag order. */
-  for ( i = 0; i < BLOCK_SIZE; i++ ) {
-    j = pbi->zigzag_index[i];
-    pbi->dequant_Y_coeffs[j] = Y_coeffs[i];
-  }
-  for ( i = 0; i < BLOCK_SIZE; i++ ) {
-    j = pbi->zigzag_index[i];
-    pbi->dequant_U_coeffs[j] = U_coeffs[i];
-  }
-  for ( i = 0; i < BLOCK_SIZE; i++ ) {
-    j = pbi->zigzag_index[i];
-    pbi->dequant_V_coeffs[j] = V_coeffs[i];
-  }
-  for ( i = 0; i < BLOCK_SIZE; i++ ){
-    j = pbi->zigzag_index[i];
-    pbi->dequant_InterY_coeffs[j] = InterY_coeffs[i];
-  }
-  for ( i = 0; i < BLOCK_SIZE; i++ ){
-    j = pbi->zigzag_index[i];
-    pbi->dequant_InterU_coeffs[j] = InterU_coeffs[i];
-  }
-  for ( i = 0; i < BLOCK_SIZE; i++ ){
-    j = pbi->zigzag_index[i];
-    pbi->dequant_InterV_coeffs[j] = InterV_coeffs[i];
-  }
-
-  pbi->dequant_coeffs = pbi->dequant_Y_coeffs;
-}
-
-void UpdateQ( PB_INSTANCE *pbi, int NewQIndex ){
-  ogg_uint32_t qscale;
-
-  /* clamp to legal bounds */
-  if (NewQIndex >= Q_TABLE_SIZE) NewQIndex = Q_TABLE_SIZE - 1;
-  else if (NewQIndex < 0) NewQIndex = 0;
-
-  pbi->FrameQIndex = NewQIndex;
-
-  qscale = pbi->quant_info.ac_scale[NewQIndex];
-  pbi->ThisFrameQualityValue = qscale;
-
-  /* Re-initialise the Q tables for forward and reverse transforms. */
-  init_dequantizer ( pbi, (unsigned char) pbi->FrameQIndex );
-}
-
-void UpdateQC( CP_INSTANCE *cpi, ogg_uint32_t NewQ ){
-  ogg_uint32_t qscale;
-  PB_INSTANCE *pbi = &cpi->pb;
-
-  /* Do bounds checking and convert to a float.  */
-  qscale = NewQ;
-  if ( qscale < pbi->quant_info.ac_scale[Q_TABLE_SIZE-1] )
-    qscale = pbi->quant_info.ac_scale[Q_TABLE_SIZE-1];
-  else if ( qscale > pbi->quant_info.ac_scale[0] )
-    qscale = pbi->quant_info.ac_scale[0];
-
-  /* Set the inter/intra descision control variables. */
-  pbi->FrameQIndex = Q_TABLE_SIZE - 1;
-  while ((ogg_int32_t) pbi->FrameQIndex >= 0 ) {
-    if ( (pbi->FrameQIndex == 0) ||
-         ( pbi->quant_info.ac_scale[pbi->FrameQIndex] >= NewQ) )
-      break;
-    pbi->FrameQIndex --;
-  }
-
-  /* Re-initialise the Q tables for forward and reverse transforms. */
-  init_quantizer ( cpi, pbi->FrameQIndex );
-  init_dequantizer ( pbi,  pbi->FrameQIndex );
-}

+ 0 - 1447
Engine/lib/libtheora/lib/enc/encoder_toplevel.c

@@ -1,1447 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: encoder_toplevel.c 15383 2008-10-10 14:33:46Z xiphmont $
-
- ********************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-#include "toplevel_lookup.h"
-#include "../internal.h"
-#include "dsp.h"
-#include "codec_internal.h"
-
-#define A_TABLE_SIZE        29
-#define DF_CANDIDATE_WINDOW 5
-
-/*
- * th_quant_info for VP3
- */
-
-/*The default quantization parameters used by VP3.1.*/
-static const int OC_VP31_RANGE_SIZES[1]={63};
-static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={
-  {
-     16, 11, 10, 16, 24,  40, 51, 61,
-     12, 12, 14, 19, 26,  58, 60, 55,
-     14, 13, 16, 24, 40,  57, 69, 56,
-     14, 17, 22, 29, 51,  87, 80, 62,
-     18, 22, 37, 58, 68, 109,103, 77,
-     24, 35, 55, 64, 81, 104,113, 92,
-     49, 64, 78, 87,103, 121,120,101,
-     72, 92, 95, 98,112, 100,103, 99
-  },
-  {
-     16, 11, 10, 16, 24,  40, 51, 61,
-     12, 12, 14, 19, 26,  58, 60, 55,
-     14, 13, 16, 24, 40,  57, 69, 56,
-     14, 17, 22, 29, 51,  87, 80, 62,
-     18, 22, 37, 58, 68, 109,103, 77,
-     24, 35, 55, 64, 81, 104,113, 92,
-     49, 64, 78, 87,103, 121,120,101,
-     72, 92, 95, 98,112, 100,103, 99
-  }
-};
-static const th_quant_base OC_VP31_BASES_INTRA_C[2]={
-  {
-     17, 18, 24, 47, 99, 99, 99, 99,
-     18, 21, 26, 66, 99, 99, 99, 99,
-     24, 26, 56, 99, 99, 99, 99, 99,
-     47, 66, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99
-  },
-  {
-     17, 18, 24, 47, 99, 99, 99, 99,
-     18, 21, 26, 66, 99, 99, 99, 99,
-     24, 26, 56, 99, 99, 99, 99, 99,
-     47, 66, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99,
-     99, 99, 99, 99, 99, 99, 99, 99
-  }
-};
-static const th_quant_base OC_VP31_BASES_INTER[2]={
-  {
-     16, 16, 16, 20, 24, 28, 32, 40,
-     16, 16, 20, 24, 28, 32, 40, 48,
-     16, 20, 24, 28, 32, 40, 48, 64,
-     20, 24, 28, 32, 40, 48, 64, 64,
-     24, 28, 32, 40, 48, 64, 64, 64,
-     28, 32, 40, 48, 64, 64, 64, 96,
-     32, 40, 48, 64, 64, 64, 96,128,
-     40, 48, 64, 64, 64, 96,128,128
-  },
-  {
-     16, 16, 16, 20, 24, 28, 32, 40,
-     16, 16, 20, 24, 28, 32, 40, 48,
-     16, 20, 24, 28, 32, 40, 48, 64,
-     20, 24, 28, 32, 40, 48, 64, 64,
-     24, 28, 32, 40, 48, 64, 64, 64,
-     28, 32, 40, 48, 64, 64, 64, 96,
-     32, 40, 48, 64, 64, 64, 96,128,
-     40, 48, 64, 64, 64, 96,128,128
-  }
-};
-
-const th_quant_info TH_VP31_QUANT_INFO={
-  {
-    220,200,190,180,170,170,160,160,
-    150,150,140,140,130,130,120,120,
-    110,110,100,100, 90, 90, 90, 80,
-     80, 80, 70, 70, 70, 60, 60, 60,
-     60, 50, 50, 50, 50, 40, 40, 40,
-     40, 40, 30, 30, 30, 30, 30, 30,
-     30, 20, 20, 20, 20, 20, 20, 20,
-     20, 10, 10, 10, 10, 10, 10, 10
-  },
-  {
-    500,450,400,370,340,310,285,265,
-    245,225,210,195,185,180,170,160,
-    150,145,135,130,125,115,110,107,
-    100, 96, 93, 89, 85, 82, 75, 74,
-     70, 68, 64, 60, 57, 56, 52, 50,
-     49, 45, 44, 43, 40, 38, 37, 35,
-     33, 32, 30, 29, 28, 25, 24, 22,
-     21, 19, 18, 17, 15, 13, 12, 10
-  },
-  {
-    30,25,20,20,15,15,14,14,
-    13,13,12,12,11,11,10,10,
-     9, 9, 8, 8, 7, 7, 7, 7,
-     6, 6, 6, 6, 5, 5, 5, 5,
-     4, 4, 4, 4, 3, 3, 3, 3,
-     2, 2, 2, 2, 2, 2, 2, 2,
-     0, 0, 0, 0, 0, 0, 0, 0,
-     0, 0, 0, 0, 0, 0, 0, 0
-  },
-  {
-    {
-      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y},
-      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C},
-      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}
-    },
-    {
-      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
-      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
-      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}
-    }
-  }
-};
-
-
-static void EClearFragmentInfo(CP_INSTANCE * cpi){
-  if(cpi->extra_fragments)
-    _ogg_free(cpi->extra_fragments);
-  if(cpi->FragmentLastQ)
-    _ogg_free(cpi->FragmentLastQ);
-  if(cpi->FragTokens)
-    _ogg_free(cpi->FragTokens);
-  if(cpi->FragTokenCounts)
-    _ogg_free(cpi->FragTokenCounts);
-  if(cpi->RunHuffIndices)
-    _ogg_free(cpi->RunHuffIndices);
-  if(cpi->LastCodedErrorScore)
-    _ogg_free(cpi->LastCodedErrorScore);
-  if(cpi->ModeList)
-    _ogg_free(cpi->ModeList);
-  if(cpi->MVList)
-    _ogg_free(cpi->MVList);
-  if(cpi->DCT_codes )
-    _ogg_free( cpi->DCT_codes );
-  if(cpi->DCTDataBuffer )
-    _ogg_free( cpi->DCTDataBuffer);
-  if(cpi->quantized_list)
-    _ogg_free( cpi->quantized_list);
-  if(cpi->OriginalDC)
-    _ogg_free( cpi->OriginalDC);
-  if(cpi->PartiallyCodedFlags)
-    _ogg_free(cpi->PartiallyCodedFlags);
-  if(cpi->PartiallyCodedMbPatterns)
-    _ogg_free(cpi->PartiallyCodedMbPatterns);
-  if(cpi->UncodedMbFlags)
-    _ogg_free(cpi->UncodedMbFlags);
-
-  if(cpi->BlockCodedFlags)
-    _ogg_free(cpi->BlockCodedFlags);
-
-  cpi->extra_fragments = 0;
-  cpi->FragmentLastQ = 0;
-  cpi->FragTokens = 0;
-  cpi->FragTokenCounts = 0;
-  cpi->RunHuffIndices = 0;
-  cpi->LastCodedErrorScore = 0;
-  cpi->ModeList = 0;
-  cpi->MVList = 0;
-  cpi->DCT_codes = 0;
-  cpi->DCTDataBuffer = 0;
-  cpi->quantized_list = 0;
-  cpi->OriginalDC = 0;
-  cpi->BlockCodedFlags = 0;
-}
-
-static void EInitFragmentInfo(CP_INSTANCE * cpi){
-
-  /* clear any existing info */
-  EClearFragmentInfo(cpi);
-
-  /* Perform Fragment Allocations */
-  cpi->extra_fragments =
-    _ogg_malloc(cpi->pb.UnitFragments*sizeof(unsigned char));
-
-  /* A note to people reading and wondering why malloc returns aren't
-     checked:
-
-     lines like the following that implement a general strategy of
-     'check the return of malloc; a zero pointer means we're out of
-     memory!'...:
-
-  if(!cpi->extra_fragments) { EDeleteFragmentInfo(cpi); return FALSE; }
-
-     ...are not useful.  It's true that many platforms follow this
-     malloc behavior, but many do not.  The more modern malloc
-     strategy is only to allocate virtual pages, which are not mapped
-     until the memory on that page is touched.  At *that* point, if
-     the machine is out of heap, the page fails to be mapped and a
-     SEGV is generated.
-
-     That means that if we want to deal with out of memory conditions,
-     we *must* be prepared to process a SEGV.  If we implement the
-     SEGV handler, there's no reason to to check malloc return; it is
-     a waste of code. */
-
-  cpi->FragmentLastQ =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->FragmentLastQ));
-  cpi->FragTokens =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->FragTokens));
-  cpi->OriginalDC =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->OriginalDC));
-  cpi->FragTokenCounts =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->FragTokenCounts));
-  cpi->RunHuffIndices =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->RunHuffIndices));
-  cpi->LastCodedErrorScore =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->LastCodedErrorScore));
-  cpi->BlockCodedFlags =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->BlockCodedFlags));
-  cpi->ModeList =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->ModeList));
-  cpi->MVList =
-    _ogg_malloc(cpi->pb.UnitFragments*
-                sizeof(*cpi->MVList));
-  cpi->DCT_codes =
-    _ogg_malloc(64*
-                sizeof(*cpi->DCT_codes));
-  cpi->DCTDataBuffer =
-    _ogg_malloc(64*
-                sizeof(*cpi->DCTDataBuffer));
-  cpi->quantized_list =
-    _ogg_malloc(64*
-                sizeof(*cpi->quantized_list));
-  cpi->PartiallyCodedFlags =
-    _ogg_malloc(cpi->pb.MacroBlocks*
-                sizeof(*cpi->PartiallyCodedFlags));
-  cpi->PartiallyCodedMbPatterns =
-    _ogg_malloc(cpi->pb.MacroBlocks*
-                sizeof(*cpi->PartiallyCodedMbPatterns));
-  cpi->UncodedMbFlags =
-    _ogg_malloc(cpi->pb.MacroBlocks*
-                sizeof(*cpi->UncodedMbFlags));
-
-}
-
-static void EClearFrameInfo(CP_INSTANCE * cpi) {
-  if(cpi->ConvDestBuffer )
-    _ogg_free(cpi->ConvDestBuffer );
-  cpi->ConvDestBuffer = 0;
-
-  if(cpi->yuv0ptr)
-    _ogg_free(cpi->yuv0ptr);
-  cpi->yuv0ptr = 0;
-
-  if(cpi->yuv1ptr)
-    _ogg_free(cpi->yuv1ptr);
-  cpi->yuv1ptr = 0;
-
-  if(cpi->OptimisedTokenListEb )
-    _ogg_free(cpi->OptimisedTokenListEb);
-  cpi->OptimisedTokenListEb = 0;
-
-  if(cpi->OptimisedTokenList )
-    _ogg_free(cpi->OptimisedTokenList);
-  cpi->OptimisedTokenList = 0;
-
-  if(cpi->OptimisedTokenListHi )
-    _ogg_free(cpi->OptimisedTokenListHi);
-  cpi->OptimisedTokenListHi = 0;
-
-  if(cpi->OptimisedTokenListPl )
-    _ogg_free(cpi->OptimisedTokenListPl);
-  cpi->OptimisedTokenListPl = 0;
-
-}
-
-static void EInitFrameInfo(CP_INSTANCE * cpi){
-  int FrameSize = cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize;
-
-  /* clear any existing info */
-  EClearFrameInfo(cpi);
-
-  /* allocate frames */
-  cpi->ConvDestBuffer =
-    _ogg_malloc(FrameSize*
-                sizeof(*cpi->ConvDestBuffer));
-  cpi->yuv0ptr =
-    _ogg_malloc(FrameSize*
-                sizeof(*cpi->yuv0ptr));
-  cpi->yuv1ptr =
-    _ogg_malloc(FrameSize*
-                sizeof(*cpi->yuv1ptr));
-  cpi->OptimisedTokenListEb =
-    _ogg_malloc(FrameSize*
-                sizeof(*cpi->OptimisedTokenListEb));
-  cpi->OptimisedTokenList =
-    _ogg_malloc(FrameSize*
-                sizeof(*cpi->OptimisedTokenList));
-  cpi->OptimisedTokenListHi =
-    _ogg_malloc(FrameSize*
-                sizeof(*cpi->OptimisedTokenListHi));
-  cpi->OptimisedTokenListPl =
-    _ogg_malloc(FrameSize*
-                sizeof(*cpi->OptimisedTokenListPl));
-}
-
-static void SetupKeyFrame(CP_INSTANCE *cpi) {
-  /* Make sure the "last frame" buffer contains the first frame data
-     as well. */
-  memcpy ( cpi->yuv0ptr, cpi->yuv1ptr,
-           cpi->pb.ReconYPlaneSize + 2 * cpi->pb.ReconUVPlaneSize );
-
-  /* Initialise the cpi->pb.display_fragments and other fragment
-     structures for the first frame. */
-  memset( cpi->pb.display_fragments, 1, cpi->pb.UnitFragments );
-  memset( cpi->extra_fragments, 1, cpi->pb.UnitFragments );
-
-  /* Set up for a KEY FRAME */
-  cpi->pb.FrameType = KEY_FRAME;
-}
-
-static void AdjustKeyFrameContext(CP_INSTANCE *cpi) {
-  ogg_uint32_t i;
-  ogg_uint32_t  AvKeyFrameFrequency =
-    (ogg_uint32_t) (cpi->CurrentFrame / cpi->KeyFrameCount);
-  ogg_uint32_t  AvKeyFrameBytes =
-    (ogg_uint32_t) (cpi->TotKeyFrameBytes / cpi->KeyFrameCount);
-  ogg_uint32_t TotalWeight=0;
-  ogg_int32_t AvKeyFramesPerSecond;
-  ogg_int32_t MinFrameTargetRate;
-
-  /* Update the frame carry over. */
-  cpi->TotKeyFrameBytes += oggpackB_bytes(cpi->oggbuffer);
-
-  /* reset keyframe context and calculate weighted average of last
-     KEY_FRAME_CONTEXT keyframes */
-  for( i = 0 ; i < KEY_FRAME_CONTEXT ; i ++ ) {
-    if ( i < KEY_FRAME_CONTEXT -1) {
-      cpi->PriorKeyFrameSize[i] = cpi->PriorKeyFrameSize[i+1];
-      cpi->PriorKeyFrameDistance[i] = cpi->PriorKeyFrameDistance[i+1];
-    } else {
-      cpi->PriorKeyFrameSize[KEY_FRAME_CONTEXT - 1] =
-        oggpackB_bytes(cpi->oggbuffer);
-      cpi->PriorKeyFrameDistance[KEY_FRAME_CONTEXT - 1] =
-        cpi->LastKeyFrame;
-    }
-
-    AvKeyFrameBytes += PriorKeyFrameWeight[i] *
-      cpi->PriorKeyFrameSize[i];
-    AvKeyFrameFrequency += PriorKeyFrameWeight[i] *
-      cpi->PriorKeyFrameDistance[i];
-    TotalWeight += PriorKeyFrameWeight[i];
-  }
-  AvKeyFrameBytes /= TotalWeight;
-  AvKeyFrameFrequency /= TotalWeight;
-  AvKeyFramesPerSecond =  100 * cpi->Configuration.OutputFrameRate /
-    AvKeyFrameFrequency ;
-
-  /* Calculate a new target rate per frame allowing for average key
-     frame frequency over newest frames . */
-  if ( 100 * cpi->Configuration.TargetBandwidth >
-       AvKeyFrameBytes * AvKeyFramesPerSecond &&
-       (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond )){
-    cpi->frame_target_rate =
-      (ogg_int32_t)(100* cpi->Configuration.TargetBandwidth -
-                    AvKeyFrameBytes * AvKeyFramesPerSecond ) /
-      ( (100 * cpi->Configuration.OutputFrameRate - AvKeyFramesPerSecond ) );
-  } else {
-    /* don't let this number get too small!!! */
-    cpi->frame_target_rate = 1;
-  }
-
-  /* minimum allowable frame_target_rate */
-  MinFrameTargetRate = (cpi->Configuration.TargetBandwidth /
-                        cpi->Configuration.OutputFrameRate) / 3;
-
-  if(cpi->frame_target_rate < MinFrameTargetRate ) {
-    cpi->frame_target_rate = MinFrameTargetRate;
-  }
-
-  cpi->LastKeyFrame = 1;
-  cpi->LastKeyFrameSize=oggpackB_bytes(cpi->oggbuffer);
-
-}
-
-static void UpdateFrame(CP_INSTANCE *cpi){
-
-  double CorrectionFactor;
-
-  /* Reset the DC predictors. */
-  cpi->pb.LastIntraDC = 0;
-  cpi->pb.InvLastIntraDC = 0;
-  cpi->pb.LastInterDC = 0;
-  cpi->pb.InvLastInterDC = 0;
-
-  /* Initialise bit packing mechanism. */
-  oggpackB_reset(cpi->oggbuffer);
-
-  /* mark as video frame */
-  oggpackB_write(cpi->oggbuffer,0,1);
-
-  /* Write out the frame header information including size. */
-  WriteFrameHeader(cpi);
-
-  /* Copy back any extra frags that are to be updated by the codec
-     as part of the background cleanup task */
-  CopyBackExtraFrags(cpi);
-
-  /* Encode the data.  */
-  EncodeData(cpi);
-
-  /* Adjust drop frame trigger. */
-  if ( cpi->pb.FrameType != KEY_FRAME ) {
-    /* Apply decay factor then add in the last frame size. */
-    cpi->DropFrameTriggerBytes =
-      ((cpi->DropFrameTriggerBytes * (DF_CANDIDATE_WINDOW-1)) /
-       DF_CANDIDATE_WINDOW) + oggpackB_bytes(cpi->oggbuffer);
-  }else{
-    /* Increase cpi->DropFrameTriggerBytes a little. Just after a key
-       frame may actually be a good time to drop a frame. */
-    cpi->DropFrameTriggerBytes =
-      (cpi->DropFrameTriggerBytes * DF_CANDIDATE_WINDOW) /
-      (DF_CANDIDATE_WINDOW-1);
-  }
-
-  /* Test for overshoot which may require a dropped frame next time
-     around.  If we are already in a drop frame condition but the
-     previous frame was not dropped then the threshold for continuing
-     to allow dropped frames is reduced. */
-  if ( cpi->DropFrameCandidate ) {
-    if ( cpi->DropFrameTriggerBytes >
-         (cpi->frame_target_rate * (DF_CANDIDATE_WINDOW+1)) )
-      cpi->DropFrameCandidate = 1;
-    else
-      cpi->DropFrameCandidate = 0;
-  } else {
-    if ( cpi->DropFrameTriggerBytes >
-         (cpi->frame_target_rate * ((DF_CANDIDATE_WINDOW*2)-2)) )
-      cpi->DropFrameCandidate = 1;
-    else
-      cpi->DropFrameCandidate = 0;
-  }
-
-  /* Update the BpbCorrectionFactor variable according to whether or
-     not we were close enough with our selection of DCT quantiser.  */
-  if ( cpi->pb.FrameType != KEY_FRAME ) {
-    /* Work out a size correction factor. */
-    CorrectionFactor = (double)oggpackB_bytes(cpi->oggbuffer) /
-      (double)cpi->ThisFrameTargetBytes;
-
-    if ( (CorrectionFactor > 1.05) &&
-         (cpi->pb.ThisFrameQualityValue <
-          cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]) ) {
-      CorrectionFactor = 1.0 + ((CorrectionFactor - 1.0)/2);
-      if ( CorrectionFactor > 1.5 )
-        cpi->BpbCorrectionFactor *= 1.5;
-      else
-        cpi->BpbCorrectionFactor *= CorrectionFactor;
-
-      /* Keep BpbCorrectionFactor within limits */
-      if ( cpi->BpbCorrectionFactor > MAX_BPB_FACTOR )
-        cpi->BpbCorrectionFactor = MAX_BPB_FACTOR;
-    } else if ( (CorrectionFactor < 0.95) &&
-                (cpi->pb.ThisFrameQualityValue > VERY_BEST_Q) ){
-      CorrectionFactor = 1.0 - ((1.0 - CorrectionFactor)/2);
-      if ( CorrectionFactor < 0.75 )
-        cpi->BpbCorrectionFactor *= 0.75;
-      else
-        cpi->BpbCorrectionFactor *= CorrectionFactor;
-
-      /* Keep BpbCorrectionFactor within limits */
-      if ( cpi->BpbCorrectionFactor < MIN_BPB_FACTOR )
-        cpi->BpbCorrectionFactor = MIN_BPB_FACTOR;
-    }
-  }
-
-  /* Adjust carry over and or key frame context. */
-  if ( cpi->pb.FrameType == KEY_FRAME ) {
-    /* Adjust the key frame context unless the key frame was very small */
-    AdjustKeyFrameContext(cpi);
-  } else {
-    /* Update the frame carry over */
-    cpi->CarryOver += ((ogg_int32_t)cpi->frame_target_rate -
-                       (ogg_int32_t)oggpackB_bytes(cpi->oggbuffer));
-  }
-  cpi->TotalByteCount += oggpackB_bytes(cpi->oggbuffer);
-}
-
-static void CompressFirstFrame(CP_INSTANCE *cpi) {
-  ogg_uint32_t i;
-
-  /* set up context of key frame sizes and distances for more local
-     datarate control */
-  for( i = 0 ; i < KEY_FRAME_CONTEXT ; i ++ ) {
-    cpi->PriorKeyFrameSize[i] = cpi->Configuration.KeyFrameDataTarget;
-    cpi->PriorKeyFrameDistance[i] = cpi->pb.info.keyframe_frequency_force;
-  }
-
-  /* Keep track of the total number of Key Frames Coded. */
-  cpi->KeyFrameCount = 1;
-  cpi->LastKeyFrame = 1;
-  cpi->TotKeyFrameBytes = 0;
-
-  /* A key frame is not a dropped frame there for reset the count of
-     consequative dropped frames. */
-  cpi->DropCount = 0;
-
-  SetupKeyFrame(cpi);
-
-  /* Calculate a new target rate per frame allowing for average key
-     frame frequency and size thus far. */
-  if ( cpi->Configuration.TargetBandwidth >
-       ((cpi->Configuration.KeyFrameDataTarget *
-         cpi->Configuration.OutputFrameRate)/
-        cpi->pb.info.keyframe_frequency) ) {
-
-    cpi->frame_target_rate =
-      (ogg_int32_t)((cpi->Configuration.TargetBandwidth -
-                     ((cpi->Configuration.KeyFrameDataTarget *
-                       cpi->Configuration.OutputFrameRate)/
-                      cpi->pb.info.keyframe_frequency)) /
-                    cpi->Configuration.OutputFrameRate);
-  }else
-    cpi->frame_target_rate = 1;
-
-  /* Set baseline frame target rate. */
-  cpi->BaseLineFrameTargetRate = cpi->frame_target_rate;
-
-  /* A key frame is not a dropped frame there for reset the count of
-     consequative dropped frames. */
-  cpi->DropCount = 0;
-
-  /* Initialise drop frame trigger to 5 frames worth of data. */
-  cpi->DropFrameTriggerBytes = cpi->frame_target_rate * DF_CANDIDATE_WINDOW;
-
-  /* Set a target size for this key frame based upon the baseline
-     target and frequency */
-  cpi->ThisFrameTargetBytes = cpi->Configuration.KeyFrameDataTarget;
-
-  /* Get a DCT quantizer level for the key frame. */
-  cpi->MotionScore = cpi->pb.UnitFragments;
-
-  RegulateQ(cpi, cpi->pb.UnitFragments);
-
-  cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue;
-
-  /* Initialise quantizer. */
-  UpdateQC(cpi, cpi->pb.ThisFrameQualityValue );
-
-  /* Initialise the cpi->pb.display_fragments and other fragment
-     structures for the first frame. */
-  for ( i = 0; i < cpi->pb.UnitFragments; i ++ )
-    cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue;
-
-  /* Compress and output the frist frame. */
-  PickIntra( cpi,
-             cpi->pb.YSBRows, cpi->pb.YSBCols);
-  UpdateFrame(cpi);
-
-  /* Initialise the carry over rate targeting variables. */
-  cpi->CarryOver = 0;
-
-}
-
-static void CompressKeyFrame(CP_INSTANCE *cpi){
-  ogg_uint32_t  i;
-
-  /* Before we compress reset the carry over to the actual frame carry over */
-  cpi->CarryOver = cpi->Configuration.TargetBandwidth * cpi->CurrentFrame  /
-    cpi->Configuration.OutputFrameRate - cpi->TotalByteCount;
-
-  /* Keep track of the total number of Key Frames Coded */
-  cpi->KeyFrameCount += 1;
-
-  /* A key frame is not a dropped frame there for reset the count of
-     consequative dropped frames. */
-  cpi->DropCount = 0;
-
-  SetupKeyFrame(cpi);
-
-  /* set a target size for this frame */
-  cpi->ThisFrameTargetBytes = (ogg_int32_t) cpi->frame_target_rate +
-    ( (cpi->Configuration.KeyFrameDataTarget - cpi->frame_target_rate) *
-      cpi->LastKeyFrame / cpi->pb.info.keyframe_frequency_force );
-
-  if ( cpi->ThisFrameTargetBytes > cpi->Configuration.KeyFrameDataTarget )
-    cpi->ThisFrameTargetBytes = cpi->Configuration.KeyFrameDataTarget;
-
-  /* Get a DCT quantizer level for the key frame. */
-  cpi->MotionScore = cpi->pb.UnitFragments;
-
-  RegulateQ(cpi, cpi->pb.UnitFragments);
-
-  cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue;
-
-  /* Initialise DCT tables. */
-  UpdateQC(cpi, cpi->pb.ThisFrameQualityValue );
-
-  /* Initialise the cpi->pb.display_fragments and other fragment
-     structures for the first frame. */
-  for ( i = 0; i < cpi->pb.UnitFragments; i ++ )
-    cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue;
-
-
-  /* Compress and output the frist frame. */
-  PickIntra( cpi,
-             cpi->pb.YSBRows, cpi->pb.YSBCols);
-  UpdateFrame(cpi);
-
-}
-
-static void CompressFrame( CP_INSTANCE *cpi) {
-  ogg_int32_t min_blocks_per_frame;
-  ogg_uint32_t  i;
-  int DropFrame = 0;
-  ogg_uint32_t  ResidueBlocksAdded=0;
-  ogg_uint32_t  KFIndicator = 0;
-
-  double QModStep;
-  double QModifier = 1.0;
-
-  /* Clear down the macro block level mode and MV arrays. */
-  for ( i = 0; i < cpi->pb.UnitFragments; i++ ) {
-    cpi->pb.FragCodingMethod[i] = CODE_INTER_NO_MV;  /* Default coding mode */
-    cpi->pb.FragMVect[i].x = 0;
-    cpi->pb.FragMVect[i].y = 0;
-  }
-
-  /* Default to delta frames. */
-  cpi->pb.FrameType = DELTA_FRAME;
-
-  /* Clear down the difference arrays for the current frame. */
-  memset( cpi->pb.display_fragments, 0, cpi->pb.UnitFragments );
-  memset( cpi->extra_fragments, 0, cpi->pb.UnitFragments );
-
-  /* Calculate the target bytes for this frame. */
-  cpi->ThisFrameTargetBytes = cpi->frame_target_rate;
-
-  /* Correct target to try and compensate for any overall rate error
-     that is developing */
-
-  /* Set the max allowed Q for this frame based upon carry over
-     history.  First set baseline worst Q for this frame */
-  cpi->Configuration.ActiveMaxQ = cpi->Configuration.MaxQ + 10;
-  if ( cpi->Configuration.ActiveMaxQ >= Q_TABLE_SIZE )
-    cpi->Configuration.ActiveMaxQ = Q_TABLE_SIZE - 1;
-
-  /* Make a further adjustment based upon the carry over and recent
-   history..  cpi->Configuration.ActiveMaxQ reduced by 1 for each 1/2
-   seconds worth of -ve carry over up to a limit of 6.  Also
-   cpi->Configuration.ActiveMaxQ reduced if frame is a
-   "DropFrameCandidate".  Remember that if we are behind the bit
-   target carry over is -ve. */
-  if ( cpi->CarryOver < 0 ) {
-    if ( cpi->DropFrameCandidate ) {
-      cpi->Configuration.ActiveMaxQ -= 4;
-    }
-
-    if ( cpi->CarryOver <
-         -((ogg_int32_t)cpi->Configuration.TargetBandwidth*3) )
-      cpi->Configuration.ActiveMaxQ -= 6;
-    else
-      cpi->Configuration.ActiveMaxQ +=
-        (ogg_int32_t) ((cpi->CarryOver*2) /
-                       (ogg_int32_t)cpi->Configuration.TargetBandwidth);
-
-    /* Check that we have not dropped quality too far */
-    if ( cpi->Configuration.ActiveMaxQ < cpi->Configuration.MaxQ )
-      cpi->Configuration.ActiveMaxQ = cpi->Configuration.MaxQ;
-  }
-
-  /* Calculate the Q Modifier step size required to cause a step down
-     from full target bandwidth to 40% of target between max Q and
-     best Q */
-  QModStep = 0.5 / (double)((Q_TABLE_SIZE - 1) -
-                            cpi->Configuration.ActiveMaxQ);
-
-  /* Set up the cpi->QTargetModifier[] table. */
-  for ( i = 0; i < cpi->Configuration.ActiveMaxQ; i++ ) {
-    cpi->QTargetModifier[i] = QModifier;
-  }
-  for ( i = cpi->Configuration.ActiveMaxQ; i < Q_TABLE_SIZE; i++ ) {
-    cpi->QTargetModifier[i] = QModifier;
-    QModifier -= QModStep;
-  }
-
-  /* if we are allowed to drop frames and are falling behind (eg more
-     than x frames worth of bandwidth) */
-  if ( cpi->pb.info.dropframes_p &&
-       ( cpi->DropCount < cpi->MaxConsDroppedFrames) &&
-       ( cpi->CarryOver <
-         -((ogg_int32_t)cpi->Configuration.TargetBandwidth)) &&
-       ( cpi->DropFrameCandidate) ) {
-    /* (we didn't do this frame so we should have some left over for
-       the next frame) */
-    cpi->CarryOver += cpi->frame_target_rate;
-    DropFrame = 1;
-    cpi->DropCount ++;
-
-    /* Adjust DropFrameTriggerBytes to account for the saving achieved. */
-    cpi->DropFrameTriggerBytes =
-      (cpi->DropFrameTriggerBytes *
-       (DF_CANDIDATE_WINDOW-1))/DF_CANDIDATE_WINDOW;
-
-    /* Even if we drop a frame we should account for it when
-        considering key frame seperation. */
-    cpi->LastKeyFrame++;
-  } else if ( cpi->CarryOver <
-              -((ogg_int32_t)cpi->Configuration.TargetBandwidth * 2) ) {
-    /* Reduce frame bit target by 1.75% for each 1/10th of a seconds
-       worth of -ve carry over down to a minimum of 65% of its
-       un-modified value. */
-
-    cpi->ThisFrameTargetBytes =
-      (ogg_uint32_t)(cpi->ThisFrameTargetBytes * 0.65);
-  } else if ( cpi->CarryOver < 0 ) {
-    /* Note that cpi->CarryOver is a -ve here hence 1.0 "+" ... */
-    cpi->ThisFrameTargetBytes =
-      (ogg_uint32_t)(cpi->ThisFrameTargetBytes *
-                     (1.0 + ( ((cpi->CarryOver * 10)/
-                               ((ogg_int32_t)cpi->
-                                Configuration.TargetBandwidth)) * 0.0175) ));
-  }
-
-  if ( !DropFrame ) {
-    /*  pick all the macroblock modes and motion vectors */
-    ogg_uint32_t InterError;
-    ogg_uint32_t IntraError;
-
-
-    /* Set Baseline filter level. */
-    ConfigurePP( &cpi->pp, cpi->pb.info.noise_sensitivity);
-
-    /* Score / analyses the fragments. */
-    cpi->MotionScore = YUVAnalyseFrame(&cpi->pp, &KFIndicator );
-
-    /* Get the baseline Q value */
-    RegulateQ( cpi, cpi->MotionScore );
-
-    /* Recode blocks if the error score in last frame was high. */
-    ResidueBlocksAdded  = 0;
-    for ( i = 0; i < cpi->pb.UnitFragments; i++ ){
-      if ( !cpi->pb.display_fragments[i] ){
-        if ( cpi->LastCodedErrorScore[i] >=
-             ResidueErrorThresh[cpi->pb.FrameQIndex] ) {
-          cpi->pb.display_fragments[i] = 1; /* Force block update */
-          cpi->extra_fragments[i] = 1;      /* Insures up to date
-                                               pixel data is used. */
-          ResidueBlocksAdded ++;
-        }
-      }
-    }
-
-    /* Adjust the motion score to allow for residue blocks
-       added. These are assumed to have below average impact on
-       bitrate (Hence ResidueBlockFactor). */
-    cpi->MotionScore = cpi->MotionScore +
-      (ResidueBlocksAdded / ResidueBlockFactor[cpi->pb.FrameQIndex]);
-
-    /* Estimate the min number of blocks at best Q */
-    min_blocks_per_frame =
-      (ogg_int32_t)(cpi->ThisFrameTargetBytes /
-                    GetEstimatedBpb( cpi, VERY_BEST_Q ));
-    if ( min_blocks_per_frame == 0 )
-      min_blocks_per_frame = 1;
-
-    /* If we have less than this number then consider adding in some
-       extra blocks */
-    if ( cpi->MotionScore < min_blocks_per_frame ) {
-      min_blocks_per_frame =
-        cpi->MotionScore +
-        (ogg_int32_t)(((min_blocks_per_frame - cpi->MotionScore) * 4) / 3 );
-      UpRegulateDataStream( cpi, VERY_BEST_Q, min_blocks_per_frame );
-    }else{
-      /* Reset control variable for best quality final pass. */
-      cpi->FinalPassLastPos = 0;
-    }
-
-    /* Get the modified Q prediction taking into account extra blocks added. */
-    RegulateQ( cpi, cpi->MotionScore );
-
-    /* Unless we are already well ahead (4 seconds of data) of the
-       projected bitrate */
-    if ( cpi->CarryOver <
-         (ogg_int32_t)(cpi->Configuration.TargetBandwidth * 4) ){
-      /* Look at the predicted Q (pbi->FrameQIndex).  Adjust the
-         target bits for this frame based upon projected Q and
-         re-calculate.  The idea is that if the Q is better than a
-         given (good enough) level then we will try and save some bits
-         for use in more difficult segments. */
-      cpi->ThisFrameTargetBytes =
-        (ogg_int32_t) (cpi->ThisFrameTargetBytes *
-                       cpi->QTargetModifier[cpi->pb.FrameQIndex]);
-
-      /* Recalculate Q again */
-      RegulateQ( cpi, cpi->MotionScore );
-    }
-
-
-    /* Select modes and motion vectors for each of the blocks : return
-       an error score for inter and intra */
-    PickModes( cpi, cpi->pb.YSBRows, cpi->pb.YSBCols,
-               cpi->pb.info.width,
-               &InterError, &IntraError );
-
-    /* decide whether we really should have made this frame a key frame */
-    /* forcing out a keyframe if the max interval is up is done at a higher level */
-    if( cpi->pb.info.keyframe_auto_p){
-      if( ( 2* IntraError < 5 * InterError )
-          && ( KFIndicator >= (ogg_uint32_t)
-               cpi->pb.info.keyframe_auto_threshold)
-          && ( cpi->LastKeyFrame > cpi->pb.info.keyframe_mindistance)
-          ){
-        CompressKeyFrame(cpi);  /* Code a key frame */
-        return;
-      }
-
-    }
-
-    /* Increment the frames since last key frame count */
-    cpi->LastKeyFrame++;
-
-    /* Proceed with the frame update. */
-    UpdateFrame(cpi);
-    cpi->DropCount = 0;
-
-    if ( cpi->MotionScore > 0 ){
-      /* Note the Quantizer used for each block coded. */
-      for ( i = 0; i < cpi->pb.UnitFragments; i++ ){
-        if ( cpi->pb.display_fragments[i] ){
-          cpi->FragmentLastQ[i] = cpi->pb.ThisFrameQualityValue;
-        }
-      }
-
-    }
-  }else{
-    /* even if we 'drop' a frame, a placeholder must be written as we
-       currently assume fixed frame rate timebase as Ogg mapping
-       invariant */
-    UpdateFrame(cpi);
-  }
-}
-
-/********************** The toplevel: encode ***********************/
-
-static int _ilog(unsigned int v){
-  int ret=0;
-  while(v){
-    ret++;
-    v>>=1;
-  }
-  return(ret);
-}
-
-static void theora_encode_dispatch_init(CP_INSTANCE *cpi);
-
-int theora_encode_init(theora_state *th, theora_info *c){
-  int i;
-
-  CP_INSTANCE *cpi;
-
-  memset(th, 0, sizeof(*th));
-  /*Currently only the 4:2:0 format is supported.*/
-  if(c->pixelformat!=OC_PF_420)return OC_IMPL;
-  th->internal_encode=cpi=_ogg_calloc(1,sizeof(*cpi));
-  theora_encode_dispatch_init(cpi);
-
-  dsp_static_init (&cpi->dsp);
-  memcpy (&cpi->pb.dsp, &cpi->dsp, sizeof(DspFunctions));
-
-  c->version_major=TH_VERSION_MAJOR;
-  c->version_minor=TH_VERSION_MINOR;
-  c->version_subminor=TH_VERSION_SUB;
-
-  InitTmpBuffers(&cpi->pb);
-  InitPPInstance(&cpi->pp, &cpi->dsp);
-
-  /* Initialise Configuration structure to legal values */
-  if(c->quality>63)c->quality=63;
-  if(c->quality<0)c->quality=32;
-  if(c->target_bitrate<0)c->target_bitrate=0;
-  /* we clamp target_bitrate to 24 bits after setting up the encoder */
-
-  cpi->Configuration.BaseQ = c->quality;
-  cpi->Configuration.FirstFrameQ = c->quality;
-  cpi->Configuration.MaxQ = c->quality;
-  cpi->Configuration.ActiveMaxQ = c->quality;
-
-  cpi->MVChangeFactor    =    14;
-  cpi->FourMvChangeFactor =   8;
-  cpi->MinImprovementForNewMV = 25;
-  cpi->ExhaustiveSearchThresh = 2500;
-  cpi->MinImprovementForFourMV = 100;
-  cpi->FourMVThreshold = 10000;
-  cpi->BitRateCapFactor = 1.5;
-  cpi->InterTripOutThresh = 5000;
-  cpi->MVEnabled = 1;
-  cpi->InterCodeCount = 127;
-  cpi->BpbCorrectionFactor = 1.0;
-  cpi->GoldenFrameEnabled = 1;
-  cpi->InterPrediction = 1;
-  cpi->MotionCompensation = 1;
-  cpi->ThreshMapThreshold = 5;
-  cpi->MaxConsDroppedFrames = 1;
-
-  /* Set encoder flags. */
-  /* if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency */
-  if(!c->keyframe_auto_p)
-    c->keyframe_frequency_force = c->keyframe_frequency;
-
-  /* Set the frame rate variables. */
-  if ( c->fps_numerator < 1 )
-    c->fps_numerator = 1;
-  if ( c->fps_denominator < 1 )
-    c->fps_denominator = 1;
-
-  /* don't go too nuts on keyframe spacing; impose a high limit to
-     make certain the granulepos encoding strategy works */
-  if(c->keyframe_frequency_force>32768)c->keyframe_frequency_force=32768;
-  if(c->keyframe_mindistance>32768)c->keyframe_mindistance=32768;
-  if(c->keyframe_mindistance>c->keyframe_frequency_force)
-    c->keyframe_mindistance=c->keyframe_frequency_force;
-  cpi->pb.keyframe_granule_shift=_ilog(c->keyframe_frequency_force-1);
-
-  /* clamp the target_bitrate to a maximum of 24 bits so we get a
-     more meaningful value when we write this out in the header. */
-  if(c->target_bitrate>(1<<24)-1)c->target_bitrate=(1<<24)-1;
-
-  /* copy in config */
-  memcpy(&cpi->pb.info,c,sizeof(*c));
-  th->i=&cpi->pb.info;
-  th->granulepos=-1;
-
-  /* Set up default values for QTargetModifier[Q_TABLE_SIZE] table */
-  for ( i = 0; i < Q_TABLE_SIZE; i++ )
-    cpi->QTargetModifier[i] = 1.0;
-
-  /* Set up an encode buffer */
-  cpi->oggbuffer = _ogg_malloc(sizeof(oggpack_buffer));
-  oggpackB_writeinit(cpi->oggbuffer);
-
-  /* Set data rate related variables. */
-  cpi->Configuration.TargetBandwidth = (c->target_bitrate) / 8;
-
-  cpi->Configuration.OutputFrameRate =
-    (double)( c->fps_numerator /
-              c->fps_denominator );
-
-  cpi->frame_target_rate = cpi->Configuration.TargetBandwidth /
-    cpi->Configuration.OutputFrameRate;
-
-  /* Set key frame data rate target; this is nominal keyframe size */
-  cpi->Configuration.KeyFrameDataTarget = (c->keyframe_data_target_bitrate *
-                                           c->fps_denominator /
-                                           c->fps_numerator ) / 8;
-
-  /* Note the height and width in the pre-processor control structure. */
-  cpi->ScanConfig.VideoFrameHeight = cpi->pb.info.height;
-  cpi->ScanConfig.VideoFrameWidth = cpi->pb.info.width;
-
-  InitFrameDetails(&cpi->pb);
-  EInitFragmentInfo(cpi);
-  EInitFrameInfo(cpi);
-
-  /* Set up pre-processor config pointers. */
-  cpi->ScanConfig.Yuv0ptr = cpi->yuv0ptr;
-  cpi->ScanConfig.Yuv1ptr = cpi->yuv1ptr;
-  cpi->ScanConfig.SrfWorkSpcPtr = cpi->ConvDestBuffer;
-  cpi->ScanConfig.disp_fragments = cpi->pb.display_fragments;
-  cpi->ScanConfig.RegionIndex = cpi->pb.pixel_index_table;
-
-  /* Initialise the pre-processor module. */
-  ScanYUVInit(&cpi->pp, &(cpi->ScanConfig));
-
-  /* Initialise Motion compensation */
-  InitMotionCompensation(cpi);
-
-  /* Initialise the compression process. */
-  /* We always start at frame 1 */
-  cpi->CurrentFrame = 1;
-
-  /* Reset the rate targeting correction factor. */
-  cpi->BpbCorrectionFactor = 1.0;
-
-  cpi->TotalByteCount = 0;
-  cpi->TotalMotionScore = 0;
-
-  /* Up regulation variables. */
-  cpi->FinalPassLastPos = 0;  /* Used to regulate a final unrestricted pass. */
-  cpi->LastEndSB = 0;         /* Where we were in the loop last time.  */
-  cpi->ResidueLastEndSB = 0;  /* Where we were in the residue update
-                                 loop last time. */
-
-  InitHuffmanSet(&cpi->pb);
-
-  /* This makes sure encoder version specific tables are initialised */
-  memcpy(&cpi->pb.quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info));
-  InitQTables(&cpi->pb);
-
-  /* Indicate that the next frame to be compressed is the first in the
-     current clip. */
-  cpi->ThisIsFirstFrame = 1;
-  cpi->readyflag = 1;
-
-  cpi->pb.HeadersWritten = 0;
-  /*We overload this flag to track header output.*/
-  cpi->doneflag=-3;
-
-  return 0;
-}
-
-int theora_encode_YUVin(theora_state *t,
-                         yuv_buffer *yuv){
-  ogg_int32_t i;
-  unsigned char *LocalDataPtr;
-  unsigned char *InputDataPtr;
-  CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
-
-  if(!cpi->readyflag)return OC_EINVAL;
-  if(cpi->doneflag>0)return OC_EINVAL;
-
-  /* If frame size has changed, abort out for now */
-  if (yuv->y_height != (int)cpi->pb.info.height ||
-      yuv->y_width != (int)cpi->pb.info.width )
-    return(-1);
-
-
-  /* Copy over input YUV to internal YUV buffers. */
-  /* we invert the image for backward compatibility with VP3 */
-  /* First copy over the Y data */
-  LocalDataPtr = cpi->yuv1ptr + yuv->y_width*(yuv->y_height - 1);
-  InputDataPtr = yuv->y;
-  for ( i = 0; i < yuv->y_height; i++ ){
-    memcpy( LocalDataPtr, InputDataPtr, yuv->y_width );
-    LocalDataPtr -= yuv->y_width;
-    InputDataPtr += yuv->y_stride;
-  }
-
-  /* Now copy over the U data */
-  LocalDataPtr = &cpi->yuv1ptr[(yuv->y_height * yuv->y_width)];
-  LocalDataPtr += yuv->uv_width*(yuv->uv_height - 1);
-  InputDataPtr = yuv->u;
-  for ( i = 0; i < yuv->uv_height; i++ ){
-    memcpy( LocalDataPtr, InputDataPtr, yuv->uv_width );
-    LocalDataPtr -= yuv->uv_width;
-    InputDataPtr += yuv->uv_stride;
-  }
-
-  /* Now copy over the V data */
-  LocalDataPtr =
-    &cpi->yuv1ptr[((yuv->y_height*yuv->y_width)*5)/4];
-  LocalDataPtr += yuv->uv_width*(yuv->uv_height - 1);
-  InputDataPtr = yuv->v;
-  for ( i = 0; i < yuv->uv_height; i++ ){
-    memcpy( LocalDataPtr, InputDataPtr, yuv->uv_width );
-    LocalDataPtr -= yuv->uv_width;
-    InputDataPtr += yuv->uv_stride;
-  }
-
-  /* Special case for first frame */
-  if ( cpi->ThisIsFirstFrame ){
-    CompressFirstFrame(cpi);
-    cpi->ThisIsFirstFrame = 0;
-    cpi->ThisIsKeyFrame = 0;
-  } else {
-
-    /* don't allow generating invalid files that overflow the p-frame
-       shift, even if keyframe_auto_p is turned off */
-    if(cpi->LastKeyFrame >= (ogg_uint32_t)
-       cpi->pb.info.keyframe_frequency_force)
-      cpi->ThisIsKeyFrame = 1;
-
-    if ( cpi->ThisIsKeyFrame ) {
-      CompressKeyFrame(cpi);
-      cpi->ThisIsKeyFrame = 0;
-    } else  {
-      /* Compress the frame. */
-      CompressFrame( cpi );
-    }
-
-  }
-
-  /* Update stats variables. */
-  cpi->LastFrameSize = oggpackB_bytes(cpi->oggbuffer);
-  cpi->CurrentFrame++;
-  cpi->packetflag=1;
-
-  t->granulepos=
-    ((cpi->CurrentFrame - cpi->LastKeyFrame)<<cpi->pb.keyframe_granule_shift)+
-    cpi->LastKeyFrame - 1;
-
-  return 0;
-}
-
-int theora_encode_packetout( theora_state *t, int last_p, ogg_packet *op){
-  CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
-  long bytes=oggpackB_bytes(cpi->oggbuffer);
-
-  if(!bytes)return(0);
-  if(!cpi->packetflag)return(0);
-  if(cpi->doneflag>0)return(-1);
-
-  op->packet=oggpackB_get_buffer(cpi->oggbuffer);
-  op->bytes=bytes;
-  op->b_o_s=0;
-  op->e_o_s=last_p;
-
-  op->packetno=cpi->CurrentFrame;
-  op->granulepos=t->granulepos;
-
-  cpi->packetflag=0;
-  if(last_p)cpi->doneflag=1;
-
-  return 1;
-}
-
-static void _tp_writebuffer(oggpack_buffer *opb, const char *buf, const long len)
-{
-  long i;
-
-  for (i = 0; i < len; i++)
-    oggpackB_write(opb, *buf++, 8);
-}
-
-static void _tp_writelsbint(oggpack_buffer *opb, long value)
-{
-  oggpackB_write(opb, value&0xFF, 8);
-  oggpackB_write(opb, value>>8&0xFF, 8);
-  oggpackB_write(opb, value>>16&0xFF, 8);
-  oggpackB_write(opb, value>>24&0xFF, 8);
-}
-
-/* build the initial short header for stream recognition and format */
-int theora_encode_header(theora_state *t, ogg_packet *op){
-  CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
-  int offset_y;
-
-  oggpackB_reset(cpi->oggbuffer);
-  oggpackB_write(cpi->oggbuffer,0x80,8);
-  _tp_writebuffer(cpi->oggbuffer, "theora", 6);
-
-  oggpackB_write(cpi->oggbuffer,TH_VERSION_MAJOR,8);
-  oggpackB_write(cpi->oggbuffer,TH_VERSION_MINOR,8);
-  oggpackB_write(cpi->oggbuffer,TH_VERSION_SUB,8);
-
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.width>>4,16);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.height>>4,16);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.frame_width,24);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.frame_height,24);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.offset_x,8);
-  /* Applications use offset_y to mean offset from the top of the image; the
-   * meaning in the bitstream is the opposite (from the bottom). Transform.
-   */
-  offset_y = cpi->pb.info.height - cpi->pb.info.frame_height -
-    cpi->pb.info.offset_y;
-  oggpackB_write(cpi->oggbuffer,offset_y,8);
-
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.fps_numerator,32);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.fps_denominator,32);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.aspect_numerator,24);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.aspect_denominator,24);
-
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.colorspace,8);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.target_bitrate,24);
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.quality,6);
-
-  oggpackB_write(cpi->oggbuffer,cpi->pb.keyframe_granule_shift,5);
-
-  oggpackB_write(cpi->oggbuffer,cpi->pb.info.pixelformat,2);
-
-  oggpackB_write(cpi->oggbuffer,0,3); /* spare config bits */
-
-  op->packet=oggpackB_get_buffer(cpi->oggbuffer);
-  op->bytes=oggpackB_bytes(cpi->oggbuffer);
-
-  op->b_o_s=1;
-  op->e_o_s=0;
-
-  op->packetno=0;
-
-  op->granulepos=0;
-  cpi->packetflag=0;
-
-  return(0);
-}
-
-/* build the comment header packet from the passed metadata */
-int theora_encode_comment(theora_comment *tc, ogg_packet *op)
-{
-  const char *vendor = theora_version_string();
-  const int vendor_length = strlen(vendor);
-  oggpack_buffer *opb;
-
-  opb = _ogg_malloc(sizeof(oggpack_buffer));
-  oggpackB_writeinit(opb);
-  oggpackB_write(opb, 0x81, 8);
-  _tp_writebuffer(opb, "theora", 6);
-
-  _tp_writelsbint(opb, vendor_length);
-  _tp_writebuffer(opb, vendor, vendor_length);
-
-  _tp_writelsbint(opb, tc->comments);
-  if(tc->comments){
-    int i;
-    for(i=0;i<tc->comments;i++){
-      if(tc->user_comments[i]){
-        _tp_writelsbint(opb,tc->comment_lengths[i]);
-        _tp_writebuffer(opb,tc->user_comments[i],tc->comment_lengths[i]);
-      }else{
-        oggpackB_write(opb,0,32);
-      }
-    }
-  }
-  op->bytes=oggpack_bytes(opb);
-
-  /* So we're expecting the application will free this? */
-  op->packet=_ogg_malloc(oggpack_bytes(opb));
-  memcpy(op->packet, oggpack_get_buffer(opb), oggpack_bytes(opb));
-  oggpack_writeclear(opb);
-
-  _ogg_free(opb);
-
-  op->b_o_s=0;
-  op->e_o_s=0;
-
-  op->packetno=0;
-  op->granulepos=0;
-
-  return (0);
-}
-
-/* build the final header packet with the tables required
-   for decode */
-int theora_encode_tables(theora_state *t, ogg_packet *op){
-  CP_INSTANCE *cpi=(CP_INSTANCE *)(t->internal_encode);
-
-  oggpackB_reset(cpi->oggbuffer);
-  oggpackB_write(cpi->oggbuffer,0x82,8);
-  _tp_writebuffer(cpi->oggbuffer,"theora",6);
-
-  WriteQTables(&cpi->pb,cpi->oggbuffer);
-  WriteHuffmanTrees(cpi->pb.HuffRoot_VP3x,cpi->oggbuffer);
-
-  op->packet=oggpackB_get_buffer(cpi->oggbuffer);
-  op->bytes=oggpackB_bytes(cpi->oggbuffer);
-
-  op->b_o_s=0;
-  op->e_o_s=0;
-
-  op->packetno=0;
-
-  op->granulepos=0;
-  cpi->packetflag=0;
-
-  cpi->pb.HeadersWritten = 1;
-
-  return(0);
-}
-
-static void theora_encode_clear (theora_state  *th){
-  CP_INSTANCE *cpi;
-  cpi=(CP_INSTANCE *)th->internal_encode;
-  if(cpi){
-
-    ClearHuffmanSet(&cpi->pb);
-    ClearFragmentInfo(&cpi->pb);
-    ClearFrameInfo(&cpi->pb);
-    EClearFragmentInfo(cpi);
-    EClearFrameInfo(cpi);
-    ClearTmpBuffers(&cpi->pb);
-    ClearPPInstance(&cpi->pp);
-
-    oggpackB_writeclear(cpi->oggbuffer);
-    _ogg_free(cpi->oggbuffer);
-    _ogg_free(cpi);
-  }
-
-  memset(th,0,sizeof(*th));
-}
-
-
-/* returns, in seconds, absolute time of current packet in given
-   logical stream */
-static double theora_encode_granule_time(theora_state *th,
- ogg_int64_t granulepos){
-#ifndef THEORA_DISABLE_FLOAT
-  CP_INSTANCE *cpi=(CP_INSTANCE *)(th->internal_encode);
-  PB_INSTANCE *pbi=(PB_INSTANCE *)(th->internal_decode);
-
-  if(cpi)pbi=&cpi->pb;
-
-  if(granulepos>=0){
-    ogg_int64_t iframe=granulepos>>pbi->keyframe_granule_shift;
-    ogg_int64_t pframe=granulepos-(iframe<<pbi->keyframe_granule_shift);
-
-    return (iframe+pframe)*
-      ((double)pbi->info.fps_denominator/pbi->info.fps_numerator);
-
-  }
-#endif
-
-  return(-1); /* negative granulepos or float calculations disabled */
-}
-
-/* returns frame number of current packet in given logical stream */
-static ogg_int64_t theora_encode_granule_frame(theora_state *th,
- ogg_int64_t granulepos){
-  CP_INSTANCE *cpi=(CP_INSTANCE *)(th->internal_encode);
-  PB_INSTANCE *pbi=(PB_INSTANCE *)(th->internal_decode);
-
-  if(cpi)pbi=&cpi->pb;
-
-  if(granulepos>=0){
-    ogg_int64_t iframe=granulepos>>pbi->keyframe_granule_shift;
-    ogg_int64_t pframe=granulepos-(iframe<<pbi->keyframe_granule_shift);
-
-    return (iframe+pframe-1);
-  }
-
-  return(-1);
-}
-
-
-static int theora_encode_control(theora_state *th,int req,
- void *buf,size_t buf_sz) {
-  CP_INSTANCE *cpi;
-  PB_INSTANCE *pbi;
-  int value;
-
-  if(th == NULL)
-    return TH_EFAULT;
-
-  cpi = th->internal_encode;
-  pbi = &cpi->pb;
-
-  switch(req) {
-    case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:
-      {
-	ogg_uint32_t keyframe_frequency_force;
-	if( (buf==NULL) || (buf_sz!=sizeof(ogg_uint32_t))) return TH_EINVAL;
-	keyframe_frequency_force=*(ogg_uint32_t *)buf;
-	
-	keyframe_frequency_force=
-	  OC_MINI(keyframe_frequency_force,
-		  1U<<cpi->pb.keyframe_granule_shift);
-	cpi->pb.info.keyframe_frequency_force=
-	  OC_MAXI(1,keyframe_frequency_force);
-	*(ogg_uint32_t *)buf=cpi->pb.info.keyframe_frequency_force;
-	return 0;
-      }
-    case TH_ENCCTL_SET_QUANT_PARAMS:
-      if( ( buf==NULL&&buf_sz!=0 )
-           || ( buf!=NULL&&buf_sz!=sizeof(th_quant_info) )
-           || cpi->pb.HeadersWritten ){
-        return TH_EINVAL;
-      }
-
-      if(buf==NULL)
-	memcpy(&pbi->quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info));
-      else
-	memcpy(&pbi->quant_info, buf, sizeof(th_quant_info));
-      InitQTables(pbi);
-
-      return 0;
-    case TH_ENCCTL_SET_VP3_COMPATIBLE:
-      if(cpi->pb.HeadersWritten)
-        return TH_EINVAL;
-
-      memcpy(&pbi->quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info));
-      InitQTables(pbi);
-
-      return 0;
-    case TH_ENCCTL_SET_SPLEVEL:
-      if(buf == NULL || buf_sz != sizeof(int))
-        return TH_EINVAL;
-
-      memcpy(&value, buf, sizeof(int));
-
-      switch(value) {
-        case 0:
-          cpi->MotionCompensation = 1;
-          pbi->info.quick_p = 0;
-        break;
-
-        case 1:
-          cpi->MotionCompensation = 1;
-          pbi->info.quick_p = 1;
-        break;
-
-        case 2:
-          cpi->MotionCompensation = 0;
-          pbi->info.quick_p = 1;
-        break;
-
-        default:
-          return TH_EINVAL;
-      }
-
-      return 0;
-    case TH_ENCCTL_GET_SPLEVEL_MAX:
-      value = 2;
-      memcpy(buf, &value, sizeof(int));
-      return 0;
-    default:
-      return TH_EIMPL;
-  }
-}
-
-static void theora_encode_dispatch_init(CP_INSTANCE *cpi){
-  cpi->dispatch_vtbl.clear=theora_encode_clear;
-  cpi->dispatch_vtbl.control=theora_encode_control;
-  cpi->dispatch_vtbl.granule_frame=theora_encode_granule_frame;
-  cpi->dispatch_vtbl.granule_time=theora_encode_granule_time;
-}

+ 0 - 243
Engine/lib/libtheora/lib/enc/frarray.c

@@ -1,243 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: frarray.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include <string.h>
-#include "codec_internal.h"
-#include "block_inline.h"
-
-/* Long run bit string coding */
-static ogg_uint32_t FrArrayCodeSBRun( CP_INSTANCE *cpi, ogg_uint32_t value){
-  ogg_uint32_t CodedVal = 0;
-  ogg_uint32_t CodedBits = 0;
-
-  /* Coding scheme:
-        Codeword              RunLength
-      0                       1
-      10x                     2-3
-      110x                    4-5
-      1110xx                  6-9
-      11110xxx                10-17
-      111110xxxx              18-33
-      111111xxxxxxxxxxxx      34-4129 */
-
-  if ( value == 1 ){
-    CodedVal = 0;
-    CodedBits = 1;
-  } else if ( value <= 3 ) {
-    CodedVal = 0x0004 + (value - 2);
-    CodedBits = 3;
-  } else if ( value <= 5 ) {
-    CodedVal = 0x000C + (value - 4);
-    CodedBits = 4;
-  } else if ( value <= 9 ) {
-    CodedVal = 0x0038 + (value - 6);
-    CodedBits = 6;
-  } else if ( value <= 17 ) {
-    CodedVal = 0x00F0 + (value - 10);
-    CodedBits = 8;
-  } else if ( value <= 33 ) {
-    CodedVal = 0x03E0 + (value - 18);
-    CodedBits = 10;
-  } else {
-    CodedVal = 0x3F000 + (value - 34);
-    CodedBits = 18;
-  }
-
-  /* Add the bits to the encode holding buffer. */
-  oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
-
-  return CodedBits;
-}
-
-/* Short run bit string coding */
-static ogg_uint32_t FrArrayCodeBlockRun( CP_INSTANCE *cpi,
-                                         ogg_uint32_t value ) {
-  ogg_uint32_t CodedVal = 0;
-  ogg_uint32_t CodedBits = 0;
-
-  /* Coding scheme:
-        Codeword                                RunLength
-        0x                                      1-2
-        10x                                     3-4
-        110x                                    5-6
-        1110xx                                  7-10
-        11110xx                                 11-14
-        11111xxxx                               15-30 */
-
-  if ( value <= 2 ) {
-    CodedVal = value - 1;
-    CodedBits = 2;
-  } else if ( value <= 4 ) {
-    CodedVal = 0x0004 + (value - 3);
-    CodedBits = 3;
-
-  } else if ( value <= 6 ) {
-    CodedVal = 0x000C + (value - 5);
-    CodedBits = 4;
-
-  } else if ( value <= 10 ) {
-    CodedVal = 0x0038 + (value - 7);
-    CodedBits = 6;
-
-  } else if ( value <= 14 ) {
-    CodedVal = 0x0078 + (value - 11);
-    CodedBits = 7;
-  } else {
-    CodedVal = 0x01F0 + (value - 15);
-    CodedBits = 9;
- }
-
-  /* Add the bits to the encode holding buffer. */
-  oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
-
-  return CodedBits;
-}
-
-void PackAndWriteDFArray( CP_INSTANCE *cpi ){
-  ogg_uint32_t  i;
-  unsigned char val;
-  ogg_uint32_t  run_count;
-
-  ogg_uint32_t  SB, MB, B;   /* Block, MB and SB loop variables */
-  ogg_uint32_t  BListIndex = 0;
-  ogg_uint32_t  LastSbBIndex = 0;
-  ogg_int32_t   DfBlockIndex;  /* Block index in display_fragments */
-
-  /* Initialise workspaces */
-  memset( cpi->pb.SBFullyFlags, 1, cpi->pb.SuperBlocks);
-  memset( cpi->pb.SBCodedFlags, 0, cpi->pb.SuperBlocks );
-  memset( cpi->PartiallyCodedFlags, 0, cpi->pb.SuperBlocks );
-  memset( cpi->BlockCodedFlags, 0, cpi->pb.UnitFragments);
-
-  for( SB = 0; SB < cpi->pb.SuperBlocks; SB++ ) {
-    /* Check for coded blocks and macro-blocks */
-    for ( MB=0; MB<4; MB++ ) {
-      /* If MB in frame */
-      if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) >= 0 ) {
-        for ( B=0; B<4; B++ ) {
-          DfBlockIndex = QuadMapToIndex1( cpi->pb.BlockMap,SB, MB, B );
-
-          /* Does Block lie in frame: */
-          if ( DfBlockIndex >= 0 ) {
-            /* In Frame: If it is not coded then this SB is only
-               partly coded.: */
-            if ( cpi->pb.display_fragments[DfBlockIndex] ) {
-              cpi->pb.SBCodedFlags[SB] = 1; /* SB at least partly coded */
-              cpi->BlockCodedFlags[BListIndex] = 1; /* Block is coded */
-
-            }else{
-              cpi->pb.SBFullyFlags[SB] = 0; /* SB not fully coded */
-              cpi->BlockCodedFlags[BListIndex] = 0; /* Block is not coded */
-            }
-
-            BListIndex++;
-          }
-        }
-      }
-    }
-
-    /* Is the SB fully coded or uncoded.
-       If so then backup BListIndex and MBListIndex */
-    if ( cpi->pb.SBFullyFlags[SB] || !cpi->pb.SBCodedFlags[SB] ) {
-      BListIndex = LastSbBIndex; /* Reset to values from previous SB */
-    }else{
-      cpi->PartiallyCodedFlags[SB] = 1; /* Set up list of partially
-                                           coded SBs */
-      LastSbBIndex = BListIndex;
-    }
-  }
-
-  /* Code list of partially coded Super-Block.  */
-  val = cpi->PartiallyCodedFlags[0];
-  oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
-  i = 0;
-  while ( i < cpi->pb.SuperBlocks ) {
-    run_count = 0;
-    while ( (i<cpi->pb.SuperBlocks) &&
-            (cpi->PartiallyCodedFlags[i]==val) &&
-            run_count<4129 ) {
-      i++;
-      run_count++;
-    }
-
-    /* Code the run */
-    FrArrayCodeSBRun( cpi, run_count);
-
-    if(run_count >= 4129 && i < cpi->pb.SuperBlocks ){
-      val = cpi->PartiallyCodedFlags[i];
-      oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
-    }else
-      val = ( val == 0 ) ? 1 : 0;
-  }
-
-  /* RLC Super-Block fully/not coded. */
-  i = 0;
-
-  /* Skip partially coded blocks */
-  while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] )
-    i++;
-
-  if ( i < cpi->pb.SuperBlocks ) {
-    val = cpi->pb.SBFullyFlags[i];
-    oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
-    while ( i < cpi->pb.SuperBlocks ) {
-      run_count = 0;
-      while ( (i < cpi->pb.SuperBlocks) &&
-              (cpi->pb.SBFullyFlags[i] == val) &&
-              run_count < 4129) {
-        i++;
-        /* Skip partially coded blocks */
-        while( (i < cpi->pb.SuperBlocks) && cpi->PartiallyCodedFlags[i] )
-          i++;
-        run_count++;
-      }
-
-      /* Code the run */
-      FrArrayCodeSBRun( cpi, run_count );
-
-    if(run_count >= 4129 && i < cpi->pb.SuperBlocks ){
-      val = cpi->PartiallyCodedFlags[i];
-      oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-    }else
-      val = ( val == 0 ) ? 1 : 0;
-    }
-  }
-
-
-  /*  Now code the block flags */
-  if ( BListIndex > 0 ) {
-    /* Code the block flags start value */
-    val = cpi->BlockCodedFlags[0];
-    oggpackB_write( cpi->oggbuffer, (ogg_uint32_t)val, 1);
-
-    /* Now code the block flags. */
-    for ( i = 0; i < BListIndex; ) {
-      run_count = 0;
-      while ( (i < BListIndex) && (cpi->BlockCodedFlags[i] == val) ) {
-        i++;
-        run_count++;
-      }
-
-      FrArrayCodeBlockRun( cpi, run_count );
-
-      val = ( val == 0 ) ? 1 : 0;
-    }
-  }
-}

+ 0 - 392
Engine/lib/libtheora/lib/enc/frinit.c

@@ -1,392 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: frinit.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include "codec_internal.h"
-
-
-void InitializeFragCoordinates(PB_INSTANCE *pbi){
-
-  ogg_uint32_t i, j;
-
-  ogg_uint32_t HorizFrags = pbi->HFragments;
-  ogg_uint32_t VertFrags = pbi->VFragments;
-  ogg_uint32_t StartFrag = 0;
-
-  /* Y */
-
-  for(i = 0; i< VertFrags; i++){
-    for(j = 0; j< HorizFrags; j++){
-
-      ogg_uint32_t ThisFrag = i * HorizFrags + j;
-      pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
-      pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
-
-    }
-  }
-
-  /* U */
-  HorizFrags >>= 1;
-  VertFrags >>= 1;
-  StartFrag = pbi->YPlaneFragments;
-
-  for(i = 0; i< VertFrags; i++) {
-    for(j = 0; j< HorizFrags; j++) {
-      ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j;
-      pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
-      pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
-
-    }
-  }
-
-  /* V */
-  StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments;
-  for(i = 0; i< VertFrags; i++) {
-    for(j = 0; j< HorizFrags; j++) {
-      ogg_uint32_t ThisFrag = StartFrag + i * HorizFrags + j;
-      pbi->FragCoordinates[ ThisFrag ].x=j * BLOCK_HEIGHT_WIDTH;
-      pbi->FragCoordinates[ ThisFrag ].y=i * BLOCK_HEIGHT_WIDTH;
-
-    }
-  }
-}
-
-static void CalcPixelIndexTable( PB_INSTANCE *pbi){
-  ogg_uint32_t i;
-  ogg_uint32_t * PixelIndexTablePtr;
-
-  /* Calculate the pixel index table for normal image buffers */
-  PixelIndexTablePtr = pbi->pixel_index_table;
-  for ( i = 0; i < pbi->YPlaneFragments; i++ ) {
-    PixelIndexTablePtr[ i ] =
-      ((i / pbi->HFragments) * VFRAGPIXELS *
-       pbi->info.width);
-    PixelIndexTablePtr[ i ] +=
-      ((i % pbi->HFragments) * HFRAGPIXELS);
-  }
-
-  PixelIndexTablePtr = &pbi->pixel_index_table[pbi->YPlaneFragments];
-  for ( i = 0; i < ((pbi->HFragments >> 1) * pbi->VFragments); i++ ) {
-    PixelIndexTablePtr[ i ] =
-      ((i / (pbi->HFragments / 2) ) *
-       (VFRAGPIXELS *
-        (pbi->info.width / 2)) );
-    PixelIndexTablePtr[ i ] +=
-      ((i % (pbi->HFragments / 2) ) *
-       HFRAGPIXELS) + pbi->YPlaneSize;
-  }
-
-  /************************************************************************/
-  /* Now calculate the pixel index table for image reconstruction buffers */
-  PixelIndexTablePtr = pbi->recon_pixel_index_table;
-  for ( i = 0; i < pbi->YPlaneFragments; i++ ){
-    PixelIndexTablePtr[ i ] =
-      ((i / pbi->HFragments) * VFRAGPIXELS *
-       pbi->YStride);
-    PixelIndexTablePtr[ i ] +=
-      ((i % pbi->HFragments) * HFRAGPIXELS) +
-      pbi->ReconYDataOffset;
-  }
-
-  /* U blocks */
-  PixelIndexTablePtr = &pbi->recon_pixel_index_table[pbi->YPlaneFragments];
-  for ( i = 0; i < pbi->UVPlaneFragments; i++ ) {
-    PixelIndexTablePtr[ i ] =
-      ((i / (pbi->HFragments / 2) ) *
-       (VFRAGPIXELS * (pbi->UVStride)) );
-    PixelIndexTablePtr[ i ] +=
-      ((i % (pbi->HFragments / 2) ) *
-       HFRAGPIXELS) + pbi->ReconUDataOffset;
-  }
-
-  /* V blocks */
-  PixelIndexTablePtr =
-    &pbi->recon_pixel_index_table[pbi->YPlaneFragments +
-                                 pbi->UVPlaneFragments];
-
-  for ( i = 0; i < pbi->UVPlaneFragments; i++ ) {
-    PixelIndexTablePtr[ i ] =
-      ((i / (pbi->HFragments / 2) ) *
-       (VFRAGPIXELS * (pbi->UVStride)) );
-    PixelIndexTablePtr[ i ] +=
-      ((i % (pbi->HFragments / 2) ) * HFRAGPIXELS) +
-      pbi->ReconVDataOffset;
-  }
-}
-
-void ClearFragmentInfo(PB_INSTANCE * pbi){
-
-  /* free prior allocs if present */
-  if(pbi->display_fragments) _ogg_free(pbi->display_fragments);
-  if(pbi->pixel_index_table) _ogg_free(pbi->pixel_index_table);
-  if(pbi->recon_pixel_index_table) _ogg_free(pbi->recon_pixel_index_table);
-  if(pbi->FragTokenCounts) _ogg_free(pbi->FragTokenCounts);
-  if(pbi->CodedBlockList) _ogg_free(pbi->CodedBlockList);
-  if(pbi->FragMVect) _ogg_free(pbi->FragMVect);
-  if(pbi->FragCoeffs) _ogg_free(pbi->FragCoeffs);
-  if(pbi->FragCoefEOB) _ogg_free(pbi->FragCoefEOB);
-  if(pbi->skipped_display_fragments) _ogg_free(pbi->skipped_display_fragments);
-  if(pbi->QFragData) _ogg_free(pbi->QFragData);
-  if(pbi->TokenList) _ogg_free(pbi->TokenList);
-  if(pbi->FragCodingMethod) _ogg_free(pbi->FragCodingMethod);
-  if(pbi->FragCoordinates) _ogg_free(pbi->FragCoordinates);
-
-  if(pbi->FragQIndex) _ogg_free(pbi->FragQIndex);
-  if(pbi->PPCoefBuffer) _ogg_free(pbi->PPCoefBuffer);
-  if(pbi->FragmentVariances) _ogg_free(pbi->FragmentVariances);
-
-  if(pbi->BlockMap) _ogg_free(pbi->BlockMap);
-
-  if(pbi->SBCodedFlags) _ogg_free(pbi->SBCodedFlags);
-  if(pbi->SBFullyFlags) _ogg_free(pbi->SBFullyFlags);
-  if(pbi->MBFullyFlags) _ogg_free(pbi->MBFullyFlags);
-  if(pbi->MBCodedFlags) _ogg_free(pbi->MBCodedFlags);
-
-  if(pbi->_Nodes) _ogg_free(pbi->_Nodes);
-  pbi->_Nodes = 0;
-
-  pbi->QFragData = 0;
-  pbi->TokenList = 0;
-  pbi->skipped_display_fragments = 0;
-  pbi->FragCoeffs = 0;
-  pbi->FragCoefEOB = 0;
-  pbi->display_fragments = 0;
-  pbi->pixel_index_table = 0;
-  pbi->recon_pixel_index_table = 0;
-  pbi->FragTokenCounts = 0;
-  pbi->CodedBlockList = 0;
-  pbi->FragCodingMethod = 0;
-  pbi->FragMVect = 0;
-  pbi->MBCodedFlags = 0;
-  pbi->MBFullyFlags = 0;
-  pbi->BlockMap = 0;
-
-  pbi->SBCodedFlags = 0;
-  pbi->SBFullyFlags = 0;
-  pbi->QFragData = 0;
-  pbi->TokenList = 0;
-  pbi->skipped_display_fragments = 0;
-  pbi->FragCoeffs = 0;
-  pbi->FragCoefEOB = 0;
-  pbi->display_fragments = 0;
-  pbi->pixel_index_table = 0;
-  pbi->recon_pixel_index_table = 0;
-  pbi->FragTokenCounts = 0;
-  pbi->CodedBlockList = 0;
-  pbi->FragCodingMethod = 0;
-  pbi->FragCoordinates = 0;
-  pbi->FragMVect = 0;
-
-  pbi->PPCoefBuffer=0;
-  pbi->PPCoefBuffer=0;
-  pbi->FragQIndex = 0;
-  pbi->FragQIndex = 0;
-  pbi->FragmentVariances= 0;
-  pbi->FragmentVariances = 0 ;
-}
-
-void InitFragmentInfo(PB_INSTANCE * pbi){
-
-  /* clear any existing info */
-  ClearFragmentInfo(pbi);
-
-  /* Perform Fragment Allocations */
-  pbi->display_fragments =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->display_fragments));
-
-  pbi->pixel_index_table =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->pixel_index_table));
-
-  pbi->recon_pixel_index_table =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->recon_pixel_index_table));
-
-  pbi->FragTokenCounts =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragTokenCounts));
-
-  pbi->CodedBlockList =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->CodedBlockList));
-
-  pbi->FragMVect =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragMVect));
-
-  pbi->FragCoeffs =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoeffs));
-
-  pbi->FragCoefEOB =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoefEOB));
-
-  pbi->skipped_display_fragments =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->skipped_display_fragments));
-
-  pbi->QFragData =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->QFragData));
-
-  pbi->TokenList =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->TokenList));
-
-  pbi->FragCodingMethod =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCodingMethod));
-
-  pbi->FragCoordinates =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragCoordinates));
-
-  pbi->FragQIndex =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragQIndex));
-
-  pbi->PPCoefBuffer =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->PPCoefBuffer));
-
-  pbi->FragmentVariances =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->FragmentVariances));
-
-  pbi->_Nodes =
-    _ogg_malloc(pbi->UnitFragments * sizeof(*pbi->_Nodes));
-
-  /* Super Block Initialization */
-  pbi->SBCodedFlags =
-    _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBCodedFlags));
-
-  pbi->SBFullyFlags =
-    _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->SBFullyFlags));
-
-  /* Macro Block Initialization */
-  pbi->MBCodedFlags =
-    _ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBCodedFlags));
-
-  pbi->MBFullyFlags =
-    _ogg_malloc(pbi->MacroBlocks * sizeof(*pbi->MBFullyFlags));
-
-  pbi->BlockMap =
-    _ogg_malloc(pbi->SuperBlocks * sizeof(*pbi->BlockMap));
-
-}
-
-void ClearFrameInfo(PB_INSTANCE * pbi){
-  if(pbi->ThisFrameRecon )
-    _ogg_free(pbi->ThisFrameRecon );
-  if(pbi->GoldenFrame)
-    _ogg_free(pbi->GoldenFrame);
-  if(pbi->LastFrameRecon)
-    _ogg_free(pbi->LastFrameRecon);
-  if(pbi->PostProcessBuffer)
-    _ogg_free(pbi->PostProcessBuffer);
-
-
-  pbi->ThisFrameRecon = 0;
-  pbi->GoldenFrame = 0;
-  pbi->LastFrameRecon = 0;
-  pbi->PostProcessBuffer = 0;
-
-
-  pbi->ThisFrameRecon = 0;
-  pbi->GoldenFrame = 0;
-  pbi->LastFrameRecon = 0;
-  pbi->PostProcessBuffer = 0;
-
-}
-
-void InitFrameInfo(PB_INSTANCE * pbi, unsigned int FrameSize){
-
-  /* clear any existing info */
-  ClearFrameInfo(pbi);
-
-  /* allocate frames */
-  pbi->ThisFrameRecon =
-    _ogg_malloc(FrameSize*sizeof(*pbi->ThisFrameRecon));
-
-  pbi->GoldenFrame =
-    _ogg_malloc(FrameSize*sizeof(*pbi->GoldenFrame));
-
-  pbi->LastFrameRecon =
-    _ogg_malloc(FrameSize*sizeof(*pbi->LastFrameRecon));
-
-  pbi->PostProcessBuffer =
-    _ogg_malloc(FrameSize*sizeof(*pbi->PostProcessBuffer));
-
-}
-
-void InitFrameDetails(PB_INSTANCE *pbi){
-  int FrameSize;
-
-  /*pbi->PostProcessingLevel = 0;
-    pbi->PostProcessingLevel = 4;
-    pbi->PostProcessingLevel = 5;
-    pbi->PostProcessingLevel = 6;*/
-
-  pbi->PostProcessingLevel = 0;
-
-
-    /* Set the frame size etc. */
-
-  pbi->YPlaneSize = pbi->info.width *
-    pbi->info.height;
-  pbi->UVPlaneSize = pbi->YPlaneSize / 4;
-  pbi->HFragments = pbi->info.width / HFRAGPIXELS;
-  pbi->VFragments = pbi->info.height / VFRAGPIXELS;
-  pbi->UnitFragments = ((pbi->VFragments * pbi->HFragments)*3)/2;
-  pbi->YPlaneFragments = pbi->HFragments * pbi->VFragments;
-  pbi->UVPlaneFragments = pbi->YPlaneFragments / 4;
-
-  pbi->YStride = (pbi->info.width + STRIDE_EXTRA);
-  pbi->UVStride = pbi->YStride / 2;
-  pbi->ReconYPlaneSize = pbi->YStride *
-    (pbi->info.height + STRIDE_EXTRA);
-  pbi->ReconUVPlaneSize = pbi->ReconYPlaneSize / 4;
-  FrameSize = pbi->ReconYPlaneSize + 2 * pbi->ReconUVPlaneSize;
-
-  pbi->YDataOffset = 0;
-  pbi->UDataOffset = pbi->YPlaneSize;
-  pbi->VDataOffset = pbi->YPlaneSize + pbi->UVPlaneSize;
-  pbi->ReconYDataOffset =
-    (pbi->YStride * UMV_BORDER) + UMV_BORDER;
-  pbi->ReconUDataOffset = pbi->ReconYPlaneSize +
-    (pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
-  pbi->ReconVDataOffset = pbi->ReconYPlaneSize + pbi->ReconUVPlaneSize +
-    (pbi->UVStride * (UMV_BORDER/2)) + (UMV_BORDER/2);
-
-  /* Image dimensions in Super-Blocks */
-  pbi->YSBRows = (pbi->info.height/32)  +
-    ( pbi->info.height%32 ? 1 : 0 );
-  pbi->YSBCols = (pbi->info.width/32)  +
-    ( pbi->info.width%32 ? 1 : 0 );
-  pbi->UVSBRows = ((pbi->info.height/2)/32)  +
-    ( (pbi->info.height/2)%32 ? 1 : 0 );
-  pbi->UVSBCols = ((pbi->info.width/2)/32)  +
-    ( (pbi->info.width/2)%32 ? 1 : 0 );
-
-  /* Super-Blocks per component */
-  pbi->YSuperBlocks = pbi->YSBRows * pbi->YSBCols;
-  pbi->UVSuperBlocks = pbi->UVSBRows * pbi->UVSBCols;
-  pbi->SuperBlocks = pbi->YSuperBlocks+2*pbi->UVSuperBlocks;
-
-  /* Useful externals */
-  pbi->MacroBlocks = ((pbi->VFragments+1)/2)*((pbi->HFragments+1)/2);
-
-  InitFragmentInfo(pbi);
-  InitFrameInfo(pbi, FrameSize);
-  InitializeFragCoordinates(pbi);
-
-  /* Configure mapping between quad-tree and fragments */
-  CreateBlockMapping ( pbi->BlockMap, pbi->YSuperBlocks,
-                       pbi->UVSuperBlocks, pbi->HFragments, pbi->VFragments);
-
-  /* Re-initialise the pixel index table. */
-
-  CalcPixelIndexTable( pbi );
-
-}
-

+ 0 - 1034
Engine/lib/libtheora/lib/enc/hufftables.h

@@ -1,1034 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: hufftables.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "../dec/huffman.h"
-#include "codec_internal.h"
-
-const unsigned char ExtraBitLengths_VP31[MAX_ENTROPY_TOKENS] = {
-  0, 0, 0, 2, 3, 4, 12,3, 6,                 /* EOB and Zero-run tokens. */
-  0, 0, 0, 0,                                /* Very low value tokens. */
-  1, 1, 1, 1, 2, 3, 4, 5, 6, 10,             /* Other value tokens */
-  1, 1, 1, 1, 1, 3, 4,                       /* Category 1 runs. */
-  2, 3,                                      /* Category 2 runs. */
-};
-
-#define NEW_FREQS 0 /* dbm - test new frequency tables */
-
-#if NEW_FREQS
-/* New baseline frequency tables for encoder version >= 2 */
-const ogg_uint32_t FrequencyCounts_VP3[NUM_HUFF_TABLES][MAX_ENTROPY_TOKENS] = {
-  /* DC Intra bias  */
-  {  272,    84,    31,    36,    10,    2,    1,    92,    1,
-     701,   872,   410,   478,
-     630,   502,   417,   356,   582,   824,   985,   965,   697,   606,
-     125,   119,    40,    3,    9,    15,    10,
-     73,    37,
-  },
-  {  311,   107,    41,    51,    18,    4,    2,   120,    1,
-     824,  1037,   468,   541,
-     714,   555,   451,   374,   595,   819,   929,   817,   474,   220,
-     172,   142,    27,    4,    9,    10,    2,
-     98,    48,
-  },
-  {  353,   125,    49,    66,    24,    6,    2,   124,    1,
-     926,  1172,   512,   594,
-     766,   581,   458,   379,   590,   789,   849,   665,   306,    80,
-     204,   147,    25,    5,    12,    9,    2,
-     108,    54,
-  },
-  {  392,   141,    57,    75,    31,    7,    4,   138,    1,
-     1050,  1321,   559,   649,
-     806,   594,   460,   372,   568,   727,   710,   475,   155,    19,
-     251,   174,    27,    7,    16,    8,    2,
-     126,    62,
-  },
-  {  455,   168,    66,    87,    39,    10,    6,   124,    2,
-     1143,  1455,   592,   692,
-     824,   596,   453,   361,   542,   657,   592,   329,    78,    5,
-     269,   184,    27,    9,    19,    7,    2,
-     127,    66,
-  },
-  {  544,   201,    80,   102,    45,    11,    6,    99,    1,
-     1236,  1587,   610,   720,
-     833,   590,   444,   348,   506,   588,   487,   226,    39,    2,
-     253,   178,    27,    10,    20,    7,    2,
-     118,    65,
-  },
-  {  649,   241,    98,   121,    54,    14,    8,    84,    1,
-     1349,  1719,   634,   763,
-     847,   583,   428,   323,   456,   492,   349,   120,    13,    1,
-     231,   170,    24,    8,    19,    7,    1,
-     109,    67,
-  },
-  {  824,   304,   129,   158,    66,    19,    10,    44,    2,
-     1476,  1925,   644,   794,
-     838,   559,   396,   289,   392,   384,   223,    53,    3,    1,
-     159,   121,    17,    6,    16,    6,    2,
-     69,    53,
-  },
-
-  /* DC Inter Bias */
-  {  534,   174,    71,    68,    10,    1,    1,    68,   119,
-     1674,  1526,   560,   536,
-     539,   331,   229,   168,   233,   262,   231,   149,    71,    51,
-     629,   530,   284,   126,   182,   208,   184,
-     148,    87,
-  },
-  {  594,   195,    77,    71,    9,    1,    1,    47,    89,
-     1723,  1592,   595,   570,
-     574,   351,   241,   176,   243,   271,   234,   144,    65,    37,
-     534,   449,   240,   117,   167,   277,   153,
-     96,    54,
-  },
-  {  642,   213,    88,    83,    12,    1,    1,    40,    80,
-     1751,  1630,   621,   600,
-     598,   367,   250,   183,   251,   276,   235,   143,    62,    28,
-     485,   397,   212,   110,   161,   193,   141,
-     84,    48,
-  },
-  {  693,   258,   114,   131,    27,    3,    1,    44,    79,
-     1794,  1644,   550,   533,
-     518,   314,   213,   154,   209,   223,   174,    97,    40,    14,
-     584,   463,   236,   138,   196,   249,   143,
-     94,    54,
-  },
-  {  758,   303,   144,   189,    53,    8,    1,    37,    69,
-     1842,  1732,   513,   504,
-     478,   287,   191,   137,   182,   186,   137,    72,    31,    6,
-     589,   469,   199,   128,   177,   264,   161,
-     89,    49,
-  },
-  {  817,   344,   170,   243,    84,    18,    2,    30,    65,
-     1836,  1733,   518,   511,
-     477,   281,   185,   130,   169,   166,   117,    59,    25,    3,
-     572,   450,   185,   121,   173,   232,   146,
-     80,    43,
-  },
-  {  865,   389,   204,   322,   139,    42,    9,    26,    51,
-     1848,  1766,   531,   522,
-     477,   275,   177,   122,   153,   144,    97,    50,    16,    1,
-     485,   378,   167,   115,   164,   203,   128,
-     74,    42,
-  },
-  {  961,   447,   243,   407,   196,    74,    26,    12,    34,
-     2003,  1942,   571,   565,
-     494,   278,   173,   116,   141,   129,    85,    44,    8,    1,
-     285,   223,   101,    66,   104,   120,    74,
-     35,    22,
-  },
-
-  /* AC INTRA Tables  */
-  /* AC Intra bias group 1 tables */
-  {  245,    68,    25,    28,    5,    1,    1,   359,    4,
-     910,   904,   570,   571,
-     766,   620,   478,   375,   554,   684,   652,   441,   182,    30,
-     535,   206,   118,    77,    69,    90,    16,
-     299,   100,
-  },
-  {  302,    86,    32,    36,    8,    1,    1,   362,    3,
-     974,   968,   599,   599,
-     774,   635,   469,   365,   528,   628,   557,   337,   118,    14,
-     577,   219,   136,    82,    69,    65,    13,
-     317,   112,
-  },
-  {  348,   102,    39,    44,    9,    2,    1,   363,    3,
-     1062,  1055,   607,   609,
-     787,   626,   457,   348,   494,   550,   452,   233,    60,    2,
-     636,   244,   159,    92,    74,    68,    12,
-     327,   119,
-  },
-  {  400,   121,    47,    51,    11,    2,    1,   366,    3,
-     1109,  1102,   620,   622,
-     786,   624,   450,   331,   459,   490,   366,   163,    29,    1,
-     673,   257,   175,    98,    77,    63,    14,
-     344,   131,
-  },
-  {  470,   151,    59,    67,    15,    3,    1,   354,    4,
-     1198,  1189,   640,   643,
-     769,   603,   410,   294,   386,   381,   240,    78,    5,    1,
-     746,   282,   205,   113,    87,    64,    15,
-     368,   145,
-  },
-  {  553,   189,    77,    94,    24,    6,    1,   347,    4,
-     1244,  1232,   650,   653,
-     739,   551,   360,   249,   303,   261,   129,    24,    1,    1,
-     828,   313,   245,   135,   108,    77,    17,
-     403,   169,
-  },
-  {  701,   253,   109,   140,    42,    12,    2,   350,    6,
-     1210,  1197,   652,   647,
-     673,   495,   299,   189,   211,   151,    50,    2,    1,    1,
-     892,   336,   284,   162,   134,   101,    25,
-     455,   205,
-  },
-  {  924,   390,   180,   248,    85,    31,    13,   286,    14,
-     1242,  1206,   601,   577,
-     519,   342,   175,   100,    85,    36,    1,    1,    1,    1,
-     1031,   348,   346,   204,   166,   131,    34,
-     473,   197,
-  },
-  /* AC Inter bias group 1 tables */
-  {  459,   128,    50,    48,    8,    1,    1,   224,    69,
-     1285,  1227,   587,   565,
-     573,   406,   261,   180,   228,   213,   130,    47,    11,    3,
-     1069,   540,   309,   231,   147,   279,   157,
-     383,   165,
-  },
-  {  524,   155,    62,    64,    14,    2,    1,   209,    63,
-     1345,  1288,   523,   507,
-     515,   358,   225,   153,   183,   160,    87,    29,    7,    2,
-     1151,   591,   365,   282,   179,   308,   133,
-     344,   157,
-  },
-  {  588,   181,    75,    81,    19,    3,    1,   204,    68,
-     1344,  1288,   517,   503,
-     505,   346,   216,   141,   169,   139,    71,    21,    5,    1,
-     1146,   584,   366,   286,   170,   298,   153,
-     342,   157,
-  },
-  {  634,   196,    82,    89,    22,    4,    1,   194,    60,
-     1356,  1312,   515,   502,
-     489,   331,   199,   127,   145,   111,    51,    14,    3,    1,
-     1156,   589,   393,   300,   182,   285,   144,
-     340,   159,
-  },
-  {  715,   231,    98,   113,    31,    7,    1,   181,    57,
-     1345,  1303,   498,   490,
-     448,   291,   166,   101,   106,    75,    30,    9,    1,    1,
-     1175,   584,   416,   321,   209,   333,   164,
-     330,   159,
-  },
-  {  825,   283,   125,   149,    44,    11,    2,   160,    59,
-     1343,  1308,   476,   469,
-     405,   247,   131,    75,    76,    47,    18,    5,    1,    1,
-     1192,   579,   432,   332,   217,   327,   176,
-     320,   154,
-  },
-  {  961,   361,   170,   215,    70,    20,    5,   161,    55,
-     1250,  1218,   463,   460,
-     354,   204,   101,    52,    48,    28,    11,    1,    1,    1,
-     1172,   570,   449,   350,   222,   332,   169,
-     338,   174,
-  },
-  {  1139,   506,   266,   387,   156,    57,    26,   114,    48,
-     1192,  1170,   366,   366,
-     226,   113,    47,    22,    22,    12,    1,    1,    1,    1,
-     1222,   551,   462,   391,   220,   322,   156,
-     290,   136,
-  },
-
-  /* AC Intra bias group 2 tables */
-  {  245,    49,    15,    11,    1,    1,    1,   332,    38,
-     1163,  1162,   685,   683,
-     813,   623,   437,   318,   421,   424,   288,   109,    14,    1,
-     729,   303,   179,   112,    87,   199,    46,
-     364,   135,
-  },
-  {  305,    67,    22,    17,    2,    1,    1,   329,    39,
-     1250,  1245,   706,   705,
-     801,   584,   385,   267,   330,   296,   165,    40,    3,    1,
-     798,   340,   206,   131,   108,   258,    52,
-     382,   154,
-  },
-  {  356,    82,    28,    23,    3,    1,    1,   312,    42,
-     1340,  1334,   701,   703,
-     770,   545,   346,   227,   269,   223,   100,    17,    1,    1,
-     846,   359,   222,   142,   120,   284,    55,
-     379,   157,
-  },
-  {  402,    95,    33,    30,    4,    1,    1,   300,    43,
-     1379,  1371,   710,   714,
-     724,   486,   289,   182,   202,   144,    47,    5,    1,    1,
-     908,   394,   250,   161,   141,   350,    60,
-     391,   171,
-  },
-  {  499,   122,    44,    42,    7,    1,    1,   267,    45,
-     1439,  1436,   690,   694,
-     628,   385,   213,   122,   117,    62,    14,    1,    1,    1,
-     992,   441,   288,   187,   167,   446,    82,
-     378,   176,
-  },
-  {  641,   168,    62,    60,    12,    1,    1,   247,    49,
-     1435,  1436,   662,   669,
-     527,   298,   142,    71,    55,    22,    3,    1,    1,    1,
-     1036,   470,   319,   208,   193,   548,   106,
-     362,   184,
-  },
-  {  860,   274,   111,   113,    23,    4,    1,   229,    59,
-     1331,  1323,   629,   645,
-     419,   192,    72,    30,    19,    6,    1,    1,    1,    1,
-     1022,   478,   339,   225,   213,   690,   142,
-     342,   198,
-  },
-  {  1059,   437,   218,   285,    84,    17,    2,   152,    44,
-     1284,  1313,   530,   561,
-     212,    66,    17,    6,    3,    1,    1,    1,    1,    1,
-     1034,   485,   346,   226,   207,   819,   185,
-     248,   145,
-  },
-  /* AC Inter bias group 2 tables */
-  {  407,    93,    31,    24,    2,    1,    1,   232,   108,
-     1365,  1349,   581,   578,
-     498,   305,   170,   100,   103,    67,    24,    5,    1,    1,
-     1175,   604,   393,   268,   209,   506,   217,
-     379,   193,
-  },
-  {  521,   129,    46,    39,    4,    1,    1,   199,   116,
-     1419,  1403,   543,   540,
-     446,   263,   138,    78,    75,    44,    13,    2,    1,    1,
-     1201,   605,   392,   267,   214,   533,   252,
-     334,   167,
-  },
-  {  575,   144,    52,    46,    6,    1,    1,   193,   124,
-     1394,  1384,   528,   528,
-     406,   227,   112,    59,    54,    28,    7,    1,    1,    1,
-     1210,   621,   412,   284,   235,   604,   265,
-     320,   167,
-  },
-  {  673,   174,    64,    59,    9,    1,    1,   177,   128,
-     1392,  1385,   499,   499,
-     352,   183,    85,    42,    35,    16,    3,    1,    1,    1,
-     1210,   626,   418,   289,   246,   675,   297,
-     292,   158,
-  },
-  {  804,   225,    85,    77,    12,    1,    1,   150,   129,
-     1387,  1384,   455,   455,
-     277,   129,    53,    23,    17,    7,    1,    1,    1,    1,
-     1212,   635,   433,   306,   268,   760,   313,
-     249,   137,
-  },
-  {  975,   305,   123,   117,    20,    2,    1,   135,   140,
-     1312,  1310,   401,   399,
-     201,    80,    28,    11,    8,    2,    1,    1,    1,    1,
-     1162,   623,   439,   314,   283,   906,   368,
-     203,   121,
-  },
-  {  1205,   452,   208,   231,    50,    6,    1,   123,   149,
-     1161,  1164,   370,   370,
-     137,    45,    14,    4,    2,    1,    1,    1,    1,    1,
-     1047,   562,   413,   300,   277,  1020,   404,
-     168,   105,
-  },
-  {  1297,   662,   389,   574,   200,    39,    4,    55,   120,
-     1069,  1076,   273,   265,
-     66,    14,    2,    1,    1,    1,    1,    1,    1,    1,
-     930,   475,   345,   249,   236,  1124,   376,
-     91,    56,
-  },
-
-  /* AC Intra bias group 3 tables */
-  {  278,    55,    17,    12,    1,    1,    1,   288,    71,
-     1315,  1304,   725,   724,
-     733,   506,   307,   195,   225,   175,    77,    12,    1,    1,
-     904,   414,   246,   170,   126,   290,   205,
-     423,   185,
-  },
-  {  382,    80,    26,    21,    2,    1,    1,   239,    64,
-     1442,  1429,   706,   701,
-     664,   420,   239,   146,   152,   105,    34,    2,    1,    1,
-     975,   440,   263,   185,   140,   332,   229,
-     397,   169,
-  },
-  {  451,    97,    32,    27,    4,    1,    1,   223,    75,
-     1462,  1454,   682,   680,
-     574,   343,   179,   101,    98,    54,    9,    1,    1,    1,
-     1031,   482,   293,   210,   163,   400,   297,
-     384,   181,
-  },
-  {  551,   128,    43,    37,    5,    1,    1,   201,    78,
-     1497,  1487,   642,   651,
-     493,   269,   133,    70,    60,    24,    2,    1,    1,    1,
-     1065,   504,   312,   228,   178,   451,   352,
-     351,   174,
-  },
-  {  693,   179,    63,    54,    8,    1,    1,   169,    78,
-     1502,  1497,   580,   591,
-     375,   186,    77,    35,    21,    4,    1,    1,    1,    1,
-     1099,   533,   341,   253,   206,   542,   432,
-     306,   164,
-  },
-  {  867,   263,   105,    96,    16,    2,    1,   152,    81,
-     1435,  1439,   521,   525,
-     270,   107,    32,    8,    3,    1,    1,    1,    1,    1,
-     1085,   537,   361,   277,   223,   616,   549,
-     258,   156,
-  },
-  {  1022,   385,   182,   207,    46,    7,    1,   158,    88,
-     1290,  1318,   501,   502,
-     184,    38,    6,    1,    1,    1,    1,    1,    1,    1,
-     1023,   480,   345,   301,   232,   665,   661,
-     210,   133,
-  },
-  {  1184,   555,   307,   457,   185,    44,    6,   115,    41,
-     1236,  1253,   329,   340,
-     32,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     1017,   385,   316,   370,   246,   672,   788,
-     85,    23,
-  },
-  /* AC Inter bias group 3 tables */
-  {  502,   106,    33,    22,    1,    1,    1,   151,   132,
-     1446,  1451,   502,   499,
-     343,   181,    84,    42,    36,    16,    3,    1,    1,    1,
-     1211,   661,   429,   312,   242,   637,   498,
-     288,   156,
-  },
-  {  651,   147,    48,    35,    3,    1,    1,   145,   140,
-     1419,  1420,   469,   466,
-     281,   132,    56,    25,    18,    6,    1,    1,    1,    1,
-     1175,   656,   435,   328,   260,   715,   556,
-     252,   147,
-  },
-  {  749,   179,    59,    43,    4,    1,    1,   123,   135,
-     1423,  1431,   413,   409,
-     221,    95,    36,    15,    9,    2,    1,    1,    1,    1,
-     1159,   658,   444,   340,   272,   782,   656,
-     205,   124,
-  },
-  {  902,   243,    86,    67,    7,    1,    1,   114,   141,
-     1385,  1385,   387,   383,
-     178,    67,    22,    7,    4,    1,    1,    1,    1,    1,
-     1096,   632,   434,   339,   277,   813,   735,
-     171,   109,
-  },
-  {  1081,   337,   133,   112,    15,    1,    1,    92,   137,
-     1350,  1349,   311,   309,
-     115,    34,    8,    2,    1,    1,    1,    1,    1,    1,
-     1016,   595,   418,   342,   283,   870,   883,
-     114,    78,
-  },
-  {  1253,   467,   210,   205,    34,    3,    1,    80,   130,
-     1318,  1313,   258,   260,
-     68,    12,    2,    1,    1,    1,    1,    1,    1,    1,
-     874,   516,   378,   330,   273,   877,  1000,
-     72,    53,
-  },
-  {  1362,   626,   333,   423,   100,    10,    1,    73,   106,
-     1311,  1313,   241,   231,
-     31,    3,    1,    1,    1,    1,    1,    1,    1,    1,
-     620,   368,   286,   302,   245,   814,  1127,
-     34,    28,
-  },
-  {  1203,   743,   460,   774,   284,    36,    1,    13,    25,
-     1956,  1961,   103,   106,
-     3,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     248,   131,   149,   272,   165,   535,   813,
-     3,    3,
-  },
-
-  /* AC Intra bias group 4 tables */
-  {  599,   150,    55,    50,    9,    1,    1,   181,    19,
-     1487,  1487,   625,   625,
-     473,   271,   138,    74,    71,    42,    11,    1,    1,    1,
-     1187,   591,   356,   239,   170,   351,   137,
-     395,   194,
-  },
-  {  758,   209,    79,    74,    15,    2,    1,   147,    25,
-     1514,  1514,   521,   520,
-     334,   165,    74,    36,    30,    11,    1,    1,    1,    1,
-     1252,   644,   409,   279,   211,   472,   203,
-     318,   171,
-  },
-  {  852,   252,   100,    98,    20,    3,    1,   130,    26,
-     1493,  1498,   481,   473,
-     268,   123,    51,    23,    15,    3,    1,    1,    1,    1,
-     1256,   652,   426,   294,   231,   543,   242,
-     278,   156,
-  },
-  {  971,   309,   130,   136,    30,    5,    1,   113,    28,
-     1458,  1467,   443,   435,
-     215,    90,    31,    12,    5,    1,    1,    1,    1,    1,
-     1232,   643,   426,   303,   243,   590,   300,
-     235,   136,
-  },
-  {  1100,   399,   180,   206,    53,    9,    1,   101,    29,
-     1419,  1425,   375,   374,
-     158,    47,    10,    1,    1,    1,    1,    1,    1,    1,
-     1193,   609,   426,   319,   256,   643,   383,
-     166,   103,
-  },
-  {  1195,   505,   249,   326,    98,    20,    3,   102,    25,
-     1370,  1356,   355,   347,
-     104,    11,    1,    1,    1,    1,    1,    1,    1,    1,
-     1100,   568,   381,   330,   261,   642,   466,
-     105,    69,
-  },
-  {  1176,   608,   345,   559,   244,    57,    6,   110,    9,
-     1370,  1332,   372,   367,
-     29,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     859,   427,   269,   359,   375,   608,   451,
-     35,    20,
-  },
-  {  1140,   613,   391,   797,   458,   180,    37,    2,    1,
-     2037,  1697,    95,    31,
-     1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     360,    49,    23,   198,  1001,   719,   160,
-     1,    1,
-  },
-  /* AC Inter bias group 4 tables */
-  {  931,   272,   105,    96,    16,    1,    1,    91,    52,
-     1481,  1489,   347,   349,
-     174,    74,    28,    12,    8,    3,    1,    1,    1,    1,
-     1247,   719,   490,   356,   279,   706,   363,
-     187,   110,
-  },
-  {  1095,   358,   148,   143,    25,    3,    1,    74,    61,
-     1439,  1457,   304,   302,
-     127,    46,    15,    5,    3,    1,    1,    1,    1,    1,
-     1138,   664,   469,   347,   282,   768,   487,
-     139,    87,
-  },
-  {  1192,   423,   188,   189,    36,    4,    1,    64,    61,
-     1457,  1475,   284,   282,
-     106,    35,    10,    3,    1,    1,    1,    1,    1,    1,
-     1078,   624,   440,   329,   264,   744,   507,
-     117,    73,
-  },
-  {  1275,   496,   231,   258,    52,    6,    1,    53,    55,
-     1458,  1470,   248,   245,
-     77,    20,    5,    1,    1,    1,    1,    1,    1,    1,
-     984,   576,   414,   323,   260,   771,   569,
-     84,    54,
-  },
-  {  1377,   603,   302,   367,    87,    11,    1,    37,    52,
-     1522,  1532,   207,   204,
-     47,    8,    1,    1,    1,    1,    1,    1,    1,    1,
-     840,   493,   366,   291,   231,   690,   636,
-     52,    32,
-  },
-  {  1409,   708,   385,   529,   148,    24,    1,    23,    37,
-     1672,  1670,   163,   162,
-     22,    2,    1,    1,    1,    1,    1,    1,    1,    1,
-     647,   364,   291,   262,   210,   574,   643,
-     26,    14,
-  },
-  {  1348,   778,   481,   755,   245,    53,    4,    13,    19,
-     2114,  2089,   141,   139,
-     7,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     302,   183,   162,   181,   182,   344,   437,
-     8,    3,
-  },
-  {  1560,   769,   410,   664,   243,    58,    1,    1,    1,
-     3017,  2788,    17,    24,
-     3,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     34,    16,    8,    55,   134,   105,    86,
-     1,    1,
-  },
-};
-
-#else /* Frequency tables for encoder version < 2 */
-
-const ogg_uint32_t FrequencyCounts_VP3[NUM_HUFF_TABLES][MAX_ENTROPY_TOKENS] = {
-  /* DC Intra bias */
-  {  198,    62,    22,    31,    14,     6,     6,   205,     3,
-     843,   843,   415,   516,
-     660,   509,   412,   347,   560,   779,   941,   930,   661,   377,
-     170,   155,    39,     2,     9,    15,    11,
-     128,    86,
-  },
-  {  299,    92,    34,    39,    15,     6,     6,   132,     1,
-     851,   851,   484,   485,
-     666,   514,   416,   351,   567,   788,   953,   943,   670,   383,
-     117,   119,    26,     4,    17,     7,     1,
-      93,    56,
-  },
-  {  367,   115,   42,   47,   16,    6,    6,   105,    1,
-     896,   896,   492,   493,
-     667,   510,   408,   342,   547,   760,   932,   927,   656,   379,
-     114,   103,   10,    3,    6,    2,    1,
-     88,   49,
-  },
-  {  462,   158,   63,   76,   28,    9,    8,   145,    1,
-     1140,  1140,   573,   574,
-     754,   562,   435,   357,   555,   742,   793,   588,   274,   81,
-     154,   117,   13,    6,   12,    2,    1,
-     104,   62,
-  },
-  {  558,   196,   81,   99,   36,   11,    9,   135,    1,
-     1300,  1301,   606,   607,
-     779,   560,   429,   349,   536,   680,   644,   405,   153,   30,
-     171,   120,   12,    5,   14,    3,    1,
-     104,   53,
-  },
-  {  635,   233,   100,   122,   46,   14,   12,   113,    1,
-     1414,  1415,   631,   631,
-     785,   555,   432,   335,   513,   611,   521,   284,   89,   13,
-     170,   113,   10,    5,   14,    3,    1,
-     102,   62,
-  },
-  {  720,   276,   119,   154,   62,   20,   16,   101,    1,
-     1583,  1583,   661,   661,
-     794,   556,   407,   318,   447,   472,   343,   153,   35,    1,
-     172,   115,   11,    7,   14,    3,    1,
-     112,   70,
-  },
-  {  853,   326,   144,   184,   80,   27,   19,   52,    1,
-     1739,  1740,   684,   685,
-     800,   540,   381,   277,   364,   352,   218,   78,   13,    1,
-     139,   109,    9,    6,   20,    2,    1,
-     94,   50,
-  },
-
-  /* DC Inter Bias */
-  {  490,   154,   57,   53,   10,    2,    1,   238,   160,
-     1391,  1390,   579,   578,
-     491,   273,   172,   118,   152,   156,   127,   79,   41,   39,
-     712,   547,   316,   125,   183,   306,   237,
-     451,   358,
-  },
-  {  566,   184,   70,   65,   11,    2,    1,   235,   51,
-     1414,  1414,   599,   598,
-     510,   285,   180,   124,   157,   161,   131,   82,   42,   40,
-     738,   551,   322,   138,   195,   188,   93,
-     473,   365,
-  },
-  {  711,   261,   111,   126,   27,    4,    1,   137,   52,
-     1506,  1505,   645,   645,
-     567,   316,   199,   136,   172,   175,   142,   88,   45,   48,
-     548,   449,   255,   145,   184,   174,   121,
-     260,   227,
-  },
-  {  823,   319,   144,   175,   43,    7,    1,   53,   42,
-     1648,  1648,   653,   652,
-     583,   329,   205,   139,   175,   176,   139,   84,   44,   34,
-     467,   389,   211,   137,   181,   186,   107,
-     106,   85,
-  },
-  {  948,   411,   201,   276,   85,   16,    2,   39,   33,
-     1778,  1777,   584,   583,
-     489,   265,   162,   111,   140,   140,   108,   64,   38,   23,
-     428,   356,   201,   139,   186,   165,   94,
-     78,   63,
-  },
-  {  1002,   470,   248,   386,   153,   39,    6,   23,   23,
-     1866,  1866,   573,   573,
-     467,   249,   155,   103,   130,   128,   94,   60,   38,   14,
-     323,   263,   159,   111,   156,   153,   74,
-     46,   34,
-  },
-  {  1020,   518,   291,   504,   242,   78,   18,   14,   14,
-     1980,  1979,   527,   526,
-     408,   219,   132,   87,   110,   104,   79,   55,   31,    7,
-     265,   213,   129,   91,   131,   111,   50,
-     31,   20,
-  },
-  {  1018,   544,   320,   591,   338,   139,   47,    5,    2,
-     2123,  2123,   548,   547,
-     414,   212,   126,   83,   101,   96,   79,   60,   23,    1,
-     120,   97,   55,   39,   60,   38,   15,
-     11,    8,
-  },
-
-  /* AC INTRA Tables  */
-  /* AC Intra bias group 1 tables */
-  {  242,   62,   22,   20,    4,    1,    1,   438,    1,
-     593,   593,   489,   490,
-     657,   580,   471,   374,   599,   783,   869,   770,   491,   279,
-     358,   144,   82,   54,   49,   70,    5,
-     289,   107,
-  },
-  {  317,   95,   38,   41,    8,    1,    1,   479,    1,
-     653,   654,   500,   501,
-     682,   611,   473,   376,   582,   762,   806,   656,   358,   155,
-     419,   162,   86,   58,   36,   34,    1,
-     315,   126,
-  },
-  {  382,   121,   49,   59,   15,    3,    1,   496,    1,
-     674,   674,   553,   554,
-     755,   636,   487,   391,   576,   718,   701,   488,   221,   72,
-     448,   161,   107,   56,   37,   29,    1,
-     362,   156,
-  },
-  {  415,   138,   57,   73,   21,    5,    1,   528,    1,
-     742,   741,   562,   563,
-     753,   669,   492,   388,   563,   664,   589,   340,   129,   26,
-     496,   184,   139,   71,   48,   33,    2,
-     387,   166,
-  },
-  {  496,   170,   73,   94,   31,    8,    2,   513,    1,
-     855,   855,   604,   604,
-     769,   662,   477,   356,   486,   526,   381,   183,   51,    5,
-     590,   214,   160,   85,   60,   39,    3,
-     427,   203,
-  },
-  {  589,   207,   89,   116,   40,   13,    3,   491,    1,
-     919,   919,   631,   631,
-     769,   633,   432,   308,   408,   378,   247,   94,   17,    1,
-     659,   247,   201,   105,   73,   51,    3,
-     466,   242,
-  },
-  {  727,   266,   115,   151,   49,   17,    6,   439,    1,
-     977,   977,   642,   642,
-     718,   572,   379,   243,   285,   251,   133,   40,    1,    1,
-     756,   287,   253,   126,   94,   66,    4,
-     492,   280,
-  },
-  {  940,   392,   180,   247,   82,   30,   14,   343,    1,
-     1064,  1064,   615,   616,
-     596,   414,   235,   146,   149,   108,   41,    1,    1,    1,
-     882,   314,   346,   172,   125,   83,    6,
-     489,   291,
-  },
-  /* AC Inter bias group 1 tables */
-  {  440,   102,   33,   23,    2,    1,    1,   465,   85,
-     852,   852,   744,   743,
-     701,   496,   297,   193,   225,   200,   129,   58,   18,    2,
-     798,   450,   269,   202,   145,   308,   154,
-     646,   389,
-  },
-  {  592,   151,   53,   43,    6,    1,    1,   409,   34,
-     875,   875,   748,   747,
-     723,   510,   305,   196,   229,   201,   130,   59,   18,    2,
-     800,   436,   253,   185,   115,   194,   88,
-     642,   368,
-  },
-  {  759,   222,   86,   85,   17,    2,    1,   376,   46,
-     888,   888,   689,   688,
-     578,   408,   228,   143,   165,   141,   84,   35,    7,    1,
-     878,   488,   321,   244,   147,   266,   124,
-     612,   367,
-  },
-  {  912,   298,   122,   133,   34,    7,    1,   261,   44,
-     1092,  1091,   496,   496,
-     409,   269,   150,   95,   106,   87,   49,   16,    1,    1,
-     1102,   602,   428,   335,   193,   323,   157,
-     423,   253,
-  },
-  {  1072,   400,   180,   210,   60,   16,    3,   210,   40,
-     1063,  1063,   451,   451,
-     345,   221,   121,   73,   79,   64,   31,    6,    1,    1,
-     1105,   608,   462,   358,   202,   330,   155,
-     377,   228,
-  },
-  {  1164,   503,   254,   330,   109,   34,    9,   167,   35,
-     1038,  1037,   390,   390,
-     278,   170,   89,   54,   56,   40,   13,    1,    1,    1,
-     1110,   607,   492,   401,   218,   343,   141,
-     323,   192,
-  },
-  {  1173,   583,   321,   486,   196,   68,   23,   124,   23,
-     1037,  1037,   347,   346,
-     232,   139,   69,   40,   37,   20,    2,    1,    1,    1,
-     1128,   584,   506,   410,   199,   301,   113,
-     283,   159,
-  },
-  {  1023,   591,   366,   699,   441,   228,   113,   79,    5,
-     1056,  1056,   291,   291,
-     173,   96,   38,   19,    8,    1,    1,    1,    1,    1,
-     1187,   527,   498,   409,   147,   210,   56,
-     263,   117,
-  },
-
-  /* AC Intra bias group 2 tables */
-  {  311,   74,   27,   27,    5,    1,    1,   470,   24,
-     665,   667,   637,   638,
-     806,   687,   524,   402,   585,   679,   609,   364,   127,   20,
-     448,   210,   131,   76,   52,   111,   19,
-     393,   195,
-  },
-  {  416,   104,   39,   38,    8,    1,    1,   545,   33,
-     730,   731,   692,   692,
-     866,   705,   501,   365,   495,   512,   387,   168,   39,    2,
-     517,   240,   154,   86,   64,   127,   19,
-     461,   247,
-  },
-  {  474,   117,   43,   42,    9,    1,    1,   560,   40,
-     783,   783,   759,   760,
-     883,   698,   466,   318,   404,   377,   215,   66,    7,    1,
-     559,   259,   176,   110,   87,   170,   22,
-     520,   278,
-  },
-  {  582,   149,   53,   53,   12,    2,    1,   473,   39,
-     992,   993,   712,   713,
-     792,   593,   373,   257,   299,   237,   114,   25,    1,    1,
-     710,   329,   221,   143,   116,   226,   26,
-     490,   259,
-  },
-  {  744,   210,   78,   77,   16,    2,    1,   417,   37,
-     1034,  1035,   728,   728,
-     718,   509,   296,   175,   184,   122,   42,    3,    1,    1,
-     791,   363,   255,   168,   145,   311,   35,
-     492,   272,
-  },
-  {  913,   291,   121,   128,   28,    4,    1,   334,   40,
-     1083,  1084,   711,   712,
-     624,   378,   191,   107,   95,   50,    7,    1,    1,    1,
-     876,   414,   288,   180,   164,   382,   39,
-     469,   275,
-  },
-  {  1065,   405,   184,   216,   53,    8,    1,   236,   36,
-     1134,  1134,   685,   686,
-     465,   253,   113,   48,   41,    9,    1,    1,    1,    1,
-     965,   451,   309,   179,   166,   429,   53,
-     414,   249,
-  },
-  {  1148,   548,   301,   438,   160,   42,    6,   84,   17,
-     1222,  1223,   574,   575,
-     272,   111,   23,    6,    2,    1,    1,    1,    1,    1,
-     1060,   502,   328,   159,   144,   501,   54,
-     302,   183,
-  },
-  /* AC Inter bias group 2 tables */
-  {  403,   80,   24,   17,    1,    1,    1,   480,   90,
-     899,   899,   820,   819,
-     667,   413,   228,   133,   139,   98,   42,   10,    1,    1,
-     865,   470,   316,   222,   171,   419,   213,
-     645,   400,
-  },
-  {  698,   169,   59,   49,    6,    1,    1,   414,   101,
-     894,   893,   761,   761,
-     561,   338,   171,   96,   97,   64,   26,    6,    1,    1,
-     896,   494,   343,   239,   192,   493,   215,
-     583,   366,
-  },
-  {  914,   255,   94,   80,   10,    1,    1,   345,   128,
-     935,   935,   670,   671,
-     415,   222,   105,   55,   51,   30,   10,    1,    1,    1,
-     954,   530,   377,   274,   232,   641,   295,
-     456,   298,
-  },
-  {  1103,   359,   146,   135,   20,    1,    1,   235,   119,
-     1042,  1042,   508,   507,
-     293,   146,   65,   33,   30,   16,    4,    1,    1,    1,
-     1031,   561,   407,   296,   265,   813,   317,
-     301,   192,
-  },
-  {  1255,   504,   238,   265,   51,    5,    1,   185,   113,
-     1013,  1013,   437,   438,
-     212,   92,   41,   18,   15,    6,    1,    1,    1,    1,
-     976,   530,   386,   276,   260,   927,   357,
-     224,   148,
-  },
-  {  1292,   610,   332,   460,   127,   16,    1,   136,   99,
-     1014,  1015,   384,   384,
-     153,   65,   25,   11,    6,    1,    1,    1,    1,    1,
-     942,   487,   343,   241,   238,   970,   358,
-     174,   103,
-  },
-  {  1219,   655,   407,   700,   280,   55,    2,   100,   60,
-     1029,  1029,   337,   336,
-     119,   43,   11,    3,    2,    1,    1,    1,    1,    1,
-     894,   448,   305,   199,   213,  1005,   320,
-     136,   77,
-  },
-  {  1099,   675,   435,   971,   581,   168,   12,   37,   16,
-     1181,  1081,   319,   318,
-     66,   11,    6,    1,    1,    1,    1,    1,    1,    1,
-     914,   370,   235,   138,   145,   949,   128,
-     94,   41,
-  },
-
-  /* AC Intra bias group 3 tables */
-  {  486,   112,   39,   34,    6,    1,    1,   541,   67,
-     819,   818,   762,   763,
-     813,   643,   403,   280,   332,   295,   164,   53,    6,    1,
-     632,   294,   180,   131,   105,   208,   109,
-     594,   295,
-  },
-  {  723,   191,   69,   65,   12,    1,    1,   445,   79,
-     865,   865,   816,   816,
-     750,   515,   290,   172,   184,   122,   46,    5,    1,    1,
-     740,   340,   213,   165,   129,   270,   168,
-     603,   326,
-  },
-  {  884,   264,   102,   103,   21,    3,    1,   382,   68,
-     897,   897,   836,   836,
-     684,   427,   227,   119,   119,   70,   16,    1,    1,    1,
-     771,   367,   234,   184,   143,   272,   178,
-     555,   326,
-  },
-  {  1028,   347,   153,   161,   36,    8,    1,   251,   44,
-     1083,  1084,   735,   735,
-     541,   289,   144,   77,   57,   23,    3,    1,    1,    1,
-     926,   422,   270,   215,   176,   301,   183,
-     443,   248,
-  },
-  {  1155,   465,   224,   264,   71,   14,    3,   174,   27,
-     1110,  1111,   730,   731,
-     429,   206,   79,   30,   19,    4,    1,    1,    1,    1,
-     929,   443,   279,   225,   194,   298,   196,
-     354,   223,
-  },
-  {  1191,   576,   296,   415,   144,   36,    8,   114,   16,
-     1162,  1162,   749,   749,
-     338,   108,   29,    8,    5,    1,    1,    1,    1,    1,
-     947,   458,   273,   207,   194,   248,   145,
-     258,   152,
-  },
-  {  1169,   619,   366,   603,   247,   92,   23,   46,    1,
-     1236,  1236,   774,   775,
-     191,   35,   14,    1,    1,    1,    1,    1,    1,    1,
-     913,   449,   260,   214,   194,   180,   82,
-     174,   98,
-  },
-  {  1006,   537,   381,   897,   504,   266,   101,   39,    1,
-     1307,  1307,   668,   667,
-     116,    3,    1,    1,    1,    1,    1,    1,    1,    1,
-     1175,   261,   295,   70,   164,   107,   31,
-     10,   76,
-  },
-  /* AC Inter bias group 3 tables */
-  {  652,   156,   53,   43,    5,    1,    1,   368,   128,
-     983,   984,   825,   825,
-     583,   331,   163,   88,   84,   48,   15,    1,    1,    1,
-     870,   480,   316,   228,   179,   421,   244,
-     562,   349,
-  },
-  {  988,   280,   104,   87,   12,    1,    1,   282,   194,
-     980,   981,   738,   739,
-     395,   189,   80,   37,   31,   12,    2,    1,    1,    1,
-     862,   489,   333,   262,   214,   600,   446,
-     390,   260,
-  },
-  {  1176,   399,   165,   154,   24,    2,    1,   218,   224,
-     1017,  1018,   651,   651,
-     280,   111,   42,   16,    9,    3,    1,    1,    1,    1,
-     787,   469,   324,   269,   229,   686,   603,
-     267,   194,
-  },
-  {  1319,   530,   255,   268,   47,    4,    1,   113,   183,
-     1149,  1150,   461,   461,
-     173,   58,   17,    5,    3,    1,    1,    1,    1,    1,
-     768,   450,   305,   261,   221,   716,   835,
-     136,   97,
-  },
-  {  1362,   669,   355,   465,   104,    9,    1,   76,   153,
-     1253,  1253,   398,   397,
-     102,   21,    5,    1,    1,    1,    1,    1,    1,    1,
-     596,   371,   238,   228,   196,   660,   954,
-     68,   53,
-  },
-  {  1354,   741,   446,   702,   174,   15,    1,   38,   87,
-     1498,  1498,   294,   294,
-     43,    7,    1,    1,    1,    1,    1,    1,    1,    1,
-     381,   283,   165,   181,   155,   544,  1039,
-     25,   21,
-  },
-  {  1262,   885,   546,   947,   263,   18,    1,   18,   27,
-     1908,  1908,   163,   162,
-     14,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     195,   152,   83,   125,   109,   361,   827,
-     7,    5,
-  },
-  {  2539,   951,   369,   554,   212,   18,    1,    1,    1,
-     2290,  2289,   64,   64,
-     1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     18,   18,    9,   55,   36,   184,   323,
-     1,    1,
-  },
-
-
-  /* AC Intra bias group 4 tables */
-  {  921,   264,   101,   100,   19,    2,    1,   331,   98,
-     1015,  1016,   799,   799,
-     512,   269,   119,   60,   50,   17,    1,    1,    1,    1,
-     841,   442,   307,   222,   182,   493,   256,
-     438,   310,
-  },
-  {  1147,   412,   184,   206,   50,    6,    1,   242,   141,
-     977,   976,   808,   807,
-     377,   135,   40,   10,    7,    1,    1,    1,    1,    1,
-     788,   402,   308,   223,   205,   584,   406,
-     316,   227,
-  },
-  {  1243,   504,   238,   310,   79,   11,    1,   184,   150,
-     983,   984,   814,   813,
-     285,   56,   10,    1,    1,    1,    1,    1,    1,    1,
-     713,   377,   287,   217,   180,   615,   558,
-     208,   164,
-  },
-  {  1266,   606,   329,   484,   161,   27,    1,   79,   92,
-     1187,  1188,   589,   588,
-     103,   10,    1,    1,    1,    1,    1,    1,    1,    1,
-     680,   371,   278,   221,   244,   614,   728,
-     80,   62,
-  },
-  {  1126,   828,   435,   705,   443,   90,    8,   10,   55,
-     1220,  1219,   350,   350,
-     28,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     602,   330,   222,   168,   158,   612,   919,
-     104,    5,
-  },
-  {  1210,   506,  1014,   926,   474,   240,    4,    1,    44,
-     1801,  1801,   171,   171,
-     1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     900,   132,    36,    11,    47,   191,   316,
-     2,    1,
-  },
-  {  1210,   506,  1014,   926,   474,   240,    4,    1,    44,
-     1801,  1801,   171,   171,
-     1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     900,   132,    36,    11,    47,   191,   316,
-     2,    1,
-  },
-  {  1210,   506,  1014,   926,   474,   240,    4,    1,    44,
-     1801,  1801,   171,   171,
-     1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     900,   132,    36,    11,    47,   191,   316,
-     2,    1,
-  },
-  /* AC Inter bias group 4 tables */
-  {  1064,   325,   129,   117,    20,    2,    1,   266,   121,
-     1000,  1000,   706,   706,
-     348,   162,    67,    32,    25,    11,    1,    1,    1,    1,
-     876,   513,   363,   274,   225,   627,   384,
-     370,   251,
-  },
-  {  1311,   517,   238,   254,    45,    3,    1,   188,   160,
-     1070,  1070,   635,   635,
-     239,    85,    30,    11,    6,    1,    1,    1,    1,    1,
-     744,   420,   313,   239,   206,   649,   541,
-     221,   155,
-  },
-  {  1394,   632,   322,   385,    78,    7,    1,   134,   152,
-     1163,  1164,   607,   607,
-     185,    51,    12,    3,    1,    1,    1,    1,    1,    1,
-     631,   331,   275,   203,   182,   604,   620,
-     146,    98,
-  },
-  {  1410,   727,   407,   546,   146,    19,    1,    67,    88,
-     1485,  1486,   419,   418,
-     103,    18,    3,    1,    1,    1,    1,    1,    1,    1,
-     555,   261,   234,   164,   148,   522,   654,
-      67,    39,
-  },
-  {  1423,   822,   492,   719,   216,    22,    1,    28,    59,
-     1793,  1793,   323,   324,
-     37,    2,    1,    1,    1,    1,    1,    1,    1,    1,
-     376,   138,   158,   102,   119,   400,   604,
-     28,    9,
-  },
-  {  1585,   923,   563,   918,   207,    25,    1,    5,    20,
-     2229,  2230,   172,   172,
-     7,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     191,    40,    56,    22,    65,   243,   312,
-     2,    1,
-  },
-  {  2225,  1100,   408,   608,   133,    8,    1,    1,    1,
-     2658,  2658,    25,    24,
-     1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     8,    1,    1,    1,    1,   125,    16,
-     1,    1,
-  },
-  {  2539,   951,   369,   554,   212,    18,    1,    1,    1,
-     2290,  2289,    64,    64,
-     1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-     18,    18,    9,    55,    36,   184,   323,
-     1,    1,
-  },
-};
-
-#endif /* NEW_FREQS */

+ 0 - 767
Engine/lib/libtheora/lib/enc/mcomp.c

@@ -1,767 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: mcomp.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "codec_internal.h"
-
-/* Initialises motion compentsation. */
-void InitMotionCompensation ( CP_INSTANCE *cpi ){
-  int i;
-  int SearchSite=0;
-  int Len;
-  int LineStepY = (ogg_int32_t)cpi->pb.YStride;
-
-  Len=((MAX_MV_EXTENT/2)+1)/2;
-
-
-  /* How many search stages are there. */
-  cpi->MVSearchSteps = 0;
-
-  /* Set up offsets arrays used in half pixel correction. */
-  cpi->HalfPixelRef2Offset[0] = -LineStepY - 1;
-  cpi->HalfPixelRef2Offset[1] = -LineStepY;
-  cpi->HalfPixelRef2Offset[2] = -LineStepY + 1;
-  cpi->HalfPixelRef2Offset[3] = - 1;
-  cpi->HalfPixelRef2Offset[4] = 0;
-  cpi->HalfPixelRef2Offset[5] = 1;
-  cpi->HalfPixelRef2Offset[6] = LineStepY - 1;
-  cpi->HalfPixelRef2Offset[7] = LineStepY;
-  cpi->HalfPixelRef2Offset[8] = LineStepY + 1;
-
-  cpi->HalfPixelXOffset[0] = -1;
-  cpi->HalfPixelXOffset[1] = 0;
-  cpi->HalfPixelXOffset[2] = 1;
-  cpi->HalfPixelXOffset[3] = -1;
-  cpi->HalfPixelXOffset[4] = 0;
-  cpi->HalfPixelXOffset[5] = 1;
-  cpi->HalfPixelXOffset[6] = -1;
-  cpi->HalfPixelXOffset[7] = 0;
-  cpi->HalfPixelXOffset[8] = 1;
-
-  cpi->HalfPixelYOffset[0] = -1;
-  cpi->HalfPixelYOffset[1] = -1;
-  cpi->HalfPixelYOffset[2] = -1;
-  cpi->HalfPixelYOffset[3] = 0;
-  cpi->HalfPixelYOffset[4] = 0;
-  cpi->HalfPixelYOffset[5] = 0;
-  cpi->HalfPixelYOffset[6] = 1;
-  cpi->HalfPixelYOffset[7] = 1;
-  cpi->HalfPixelYOffset[8] = 1;
-
-
-  /* Generate offsets for 8 search sites per step. */
-  while ( Len>0 ) {
-    /* Another step. */
-    cpi->MVSearchSteps += 1;
-
-    /* Compute offsets for search sites. */
-    cpi->MVOffsetX[SearchSite] = -Len;
-    cpi->MVOffsetY[SearchSite++] = -Len;
-    cpi->MVOffsetX[SearchSite] = 0;
-    cpi->MVOffsetY[SearchSite++] = -Len;
-    cpi->MVOffsetX[SearchSite] = Len;
-    cpi->MVOffsetY[SearchSite++] = -Len;
-    cpi->MVOffsetX[SearchSite] = -Len;
-    cpi->MVOffsetY[SearchSite++] = 0;
-    cpi->MVOffsetX[SearchSite] = Len;
-    cpi->MVOffsetY[SearchSite++] = 0;
-    cpi->MVOffsetX[SearchSite] = -Len;
-    cpi->MVOffsetY[SearchSite++] = Len;
-    cpi->MVOffsetX[SearchSite] = 0;
-    cpi->MVOffsetY[SearchSite++] = Len;
-    cpi->MVOffsetX[SearchSite] = Len;
-    cpi->MVOffsetY[SearchSite++] = Len;
-
-    /* Contract. */
-    Len /= 2;
-  }
-
-  /* Compute pixel index offsets. */
-  for ( i=SearchSite-1; i>=0; i-- )
-    cpi->MVPixelOffsetY[i] = (cpi->MVOffsetY[i]*LineStepY) + cpi->MVOffsetX[i];
-}
-
-static ogg_uint32_t GetInterErr (CP_INSTANCE *cpi, unsigned char * NewDataPtr,
-                          unsigned char * RefDataPtr1,
-                          unsigned char * RefDataPtr2,
-                          ogg_uint32_t PixelsPerLine ) {
-  ogg_int32_t   DiffVal;
-  ogg_int32_t   RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
-  ogg_uint32_t  RefPixelsPerLine = PixelsPerLine + STRIDE_EXTRA;
-
-  /* Mode of interpolation chosen based upon on the offset of the
-     second reference pointer */
-  if ( RefOffset == 0 ) {
-    DiffVal = dsp_inter8x8_err (cpi->dsp, NewDataPtr, PixelsPerLine,
-              RefDataPtr1, RefPixelsPerLine);
-  }else{
-    DiffVal = dsp_inter8x8_err_xy2 (cpi->dsp, NewDataPtr, PixelsPerLine,
-              RefDataPtr1,
-              RefDataPtr2, RefPixelsPerLine);
-  }
-
-  /* Compute and return population variance as mis-match metric. */
-  return DiffVal;
-}
-
-static ogg_uint32_t GetHalfPixelSumAbsDiffs (CP_INSTANCE *cpi,
-                                      unsigned char * SrcData,
-                                      unsigned char * RefDataPtr1,
-                                      unsigned char * RefDataPtr2,
-                                      ogg_uint32_t PixelsPerLine,
-                                      ogg_uint32_t ErrorSoFar,
-                                      ogg_uint32_t BestSoFar ) {
-
-  ogg_uint32_t  DiffVal = ErrorSoFar;
-  ogg_int32_t   RefOffset = (int)(RefDataPtr1 - RefDataPtr2);
-  ogg_uint32_t  RefPixelsPerLine = PixelsPerLine + STRIDE_EXTRA;
-
-  if ( RefOffset == 0 ) {
-    /* Simple case as for non 0.5 pixel */
-    DiffVal += dsp_sad8x8 (cpi->dsp, SrcData, PixelsPerLine,
-                   RefDataPtr1, RefPixelsPerLine);
-  } else  {
-    DiffVal += dsp_sad8x8_xy2_thres (cpi->dsp, SrcData, PixelsPerLine,
-                   RefDataPtr1,
-                   RefDataPtr2, RefPixelsPerLine, BestSoFar);
-  }
-
-  return DiffVal;
-}
-
-ogg_uint32_t GetMBIntraError (CP_INSTANCE *cpi, ogg_uint32_t FragIndex,
-                              ogg_uint32_t PixelsPerLine ) {
-  ogg_uint32_t  LocalFragIndex = FragIndex;
-  ogg_uint32_t  IntraError = 0;
-
-  dsp_save_fpu (cpi->dsp);
-
-  /* Add together the intra errors for those blocks in the macro block
-     that are coded (Y only) */
-  if ( cpi->pb.display_fragments[LocalFragIndex] )
-    IntraError +=
-      dsp_intra8x8_err (cpi->dsp, &cpi->
-                    ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
-                    PixelsPerLine);
-
-  LocalFragIndex++;
-  if ( cpi->pb.display_fragments[LocalFragIndex] )
-    IntraError +=
-      dsp_intra8x8_err (cpi->dsp, &cpi->
-                    ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
-                    PixelsPerLine);
-
-  LocalFragIndex = FragIndex + cpi->pb.HFragments;
-  if ( cpi->pb.display_fragments[LocalFragIndex] )
-    IntraError +=
-      dsp_intra8x8_err (cpi->dsp, &cpi->
-                     ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
-                    PixelsPerLine);
-
-  LocalFragIndex++;
-  if ( cpi->pb.display_fragments[LocalFragIndex] )
-    IntraError +=
-      dsp_intra8x8_err (cpi->dsp, &cpi->
-                    ConvDestBuffer[cpi->pb.pixel_index_table[LocalFragIndex]],
-                    PixelsPerLine);
-
-  dsp_restore_fpu (cpi->dsp);
-
-  return IntraError;
-}
-
-ogg_uint32_t GetMBInterError (CP_INSTANCE *cpi,
-                              unsigned char * SrcPtr,
-                              unsigned char * RefPtr,
-                              ogg_uint32_t FragIndex,
-                              ogg_int32_t LastXMV,
-                              ogg_int32_t LastYMV,
-                              ogg_uint32_t PixelsPerLine ) {
-  ogg_uint32_t  RefPixelsPerLine = cpi->pb.YStride;
-  ogg_uint32_t  LocalFragIndex = FragIndex;
-  ogg_int32_t   PixelIndex;
-  ogg_int32_t   RefPixelIndex;
-  ogg_int32_t   RefPixelOffset;
-  ogg_int32_t   RefPtr2Offset;
-
-  ogg_uint32_t  InterError = 0;
-
-  unsigned char * SrcPtr1;
-  unsigned char * RefPtr1;
-
-  dsp_save_fpu (cpi->dsp);
-
-  /* Work out pixel offset into source buffer. */
-  PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
-
-  /* Work out the pixel offset in reference buffer for the default
-     motion vector */
-  RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
-  RefPixelOffset = ((LastYMV/2) * RefPixelsPerLine) + (LastXMV/2);
-
-  /* Work out the second reference pointer offset. */
-  RefPtr2Offset = 0;
-  if ( LastXMV % 2 ) {
-    if ( LastXMV > 0 )
-      RefPtr2Offset += 1;
-    else
-      RefPtr2Offset -= 1;
-  }
-  if ( LastYMV % 2 ) {
-    if ( LastYMV > 0 )
-      RefPtr2Offset += RefPixelsPerLine;
-    else
-      RefPtr2Offset -= RefPixelsPerLine;
-  }
-
-  /* Add together the errors for those blocks in the macro block that
-     are coded (Y only) */
-  if ( cpi->pb.display_fragments[LocalFragIndex] ) {
-    SrcPtr1 = &SrcPtr[PixelIndex];
-    RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
-    InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
-                                 &RefPtr1[RefPtr2Offset], PixelsPerLine );
-  }
-
-  LocalFragIndex++;
-  if ( cpi->pb.display_fragments[LocalFragIndex] ) {
-    PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
-    RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
-    SrcPtr1 = &SrcPtr[PixelIndex];
-    RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
-    InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
-                                 &RefPtr1[RefPtr2Offset], PixelsPerLine );
-
-  }
-
-  LocalFragIndex = FragIndex + cpi->pb.HFragments;
-  if ( cpi->pb.display_fragments[LocalFragIndex] ) {
-    PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
-    RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
-    SrcPtr1 = &SrcPtr[PixelIndex];
-    RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
-    InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
-                                 &RefPtr1[RefPtr2Offset], PixelsPerLine );
-  }
-
-  LocalFragIndex++;
-  if ( cpi->pb.display_fragments[LocalFragIndex] ) {
-    PixelIndex = cpi->pb.pixel_index_table[LocalFragIndex];
-    RefPixelIndex = cpi->pb.recon_pixel_index_table[LocalFragIndex];
-    SrcPtr1 = &SrcPtr[PixelIndex];
-    RefPtr1 = &RefPtr[RefPixelIndex + RefPixelOffset];
-    InterError += GetInterErr(cpi, SrcPtr1, RefPtr1,
-                                 &RefPtr1[RefPtr2Offset], PixelsPerLine );
-  }
-
-  dsp_restore_fpu (cpi->dsp);
-
-  return InterError;
-}
-
-ogg_uint32_t GetMBMVInterError (CP_INSTANCE *cpi,
-                                unsigned char * RefFramePtr,
-                                ogg_uint32_t FragIndex,
-                                ogg_uint32_t PixelsPerLine,
-                                ogg_int32_t *MVPixelOffset,
-                                MOTION_VECTOR *MV ) {
-  ogg_uint32_t  Error = 0;
-  ogg_uint32_t  MinError;
-  ogg_uint32_t  InterMVError = 0;
-
-  ogg_int32_t   i;
-  ogg_int32_t   x=0, y=0;
-  ogg_int32_t   step;
-  ogg_int32_t   SearchSite=0;
-
-  unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL};
-  unsigned char *RefPtr=NULL;
-  unsigned char *CandidateBlockPtr=NULL;
-  unsigned char *BestBlockPtr=NULL;
-
-  ogg_uint32_t  RefRow2Offset = cpi->pb.YStride * 8;
-
-  int    MBlockDispFrags[4];
-
-  /* Half pixel variables */
-  ogg_int32_t   HalfPixelError;
-  ogg_int32_t   BestHalfPixelError;
-  unsigned char   BestHalfOffset;
-  unsigned char * RefDataPtr1;
-  unsigned char * RefDataPtr2;
-
-  dsp_save_fpu (cpi->dsp);
-
-  /* Note which of the four blocks in the macro block are to be
-     included in the search. */
-  MBlockDispFrags[0] =
-    cpi->pb.display_fragments[FragIndex];
-  MBlockDispFrags[1] =
-    cpi->pb.display_fragments[FragIndex + 1];
-  MBlockDispFrags[2] =
-    cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments];
-  MBlockDispFrags[3] =
-    cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1];
-
-  /* Set up the source pointers for the four source blocks.  */
-  SrcPtr[0] = &cpi->ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
-  SrcPtr[1] = SrcPtr[0] + 8;
-  SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8);
-  SrcPtr[3] = SrcPtr[2] + 8;
-
-  /* Set starting reference point for search. */
-  RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
-
-  /* Check the 0,0 candidate. */
-  if ( MBlockDispFrags[0] ) {
-    Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, RefPtr,
-                         PixelsPerLine + STRIDE_EXTRA);
-  }
-  if ( MBlockDispFrags[1] ) {
-    Error += dsp_sad8x8 (cpi->dsp, SrcPtr[1], PixelsPerLine, RefPtr + 8,
-                         PixelsPerLine + STRIDE_EXTRA);
-  }
-  if ( MBlockDispFrags[2] ) {
-    Error += dsp_sad8x8 (cpi->dsp, SrcPtr[2], PixelsPerLine, RefPtr + RefRow2Offset,
-                         PixelsPerLine + STRIDE_EXTRA);
-  }
-  if ( MBlockDispFrags[3] ) {
-    Error += dsp_sad8x8 (cpi->dsp, SrcPtr[3], PixelsPerLine, RefPtr + RefRow2Offset + 8,
-                         PixelsPerLine + STRIDE_EXTRA);
-  }
-
-  /* Set starting values to results of 0, 0 vector. */
-  MinError = Error;
-  BestBlockPtr = RefPtr;
-  x = 0;
-  y = 0;
-  MV->x = 0;
-  MV->y = 0;
-
-  /* Proceed through N-steps. */
-  for (  step=0; step<cpi->MVSearchSteps; step++ ) {
-    /* Search the 8-neighbours at distance pertinent to current step.*/
-    for ( i=0; i<8; i++ ) {
-      /* Set pointer to next candidate matching block. */
-      CandidateBlockPtr = RefPtr + MVPixelOffset[SearchSite];
-
-      /* Reset error */
-      Error = 0;
-
-      /* Get the score for the current offset */
-      if ( MBlockDispFrags[0] ) {
-        Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, CandidateBlockPtr,
-                             PixelsPerLine + STRIDE_EXTRA);
-      }
-
-      if ( MBlockDispFrags[1] && (Error < MinError) ) {
-        Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[1], PixelsPerLine, CandidateBlockPtr + 8,
-                             PixelsPerLine + STRIDE_EXTRA, MinError);
-      }
-
-      if ( MBlockDispFrags[2] && (Error < MinError) ) {
-        Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[2], PixelsPerLine, CandidateBlockPtr + RefRow2Offset,
-                             PixelsPerLine + STRIDE_EXTRA, MinError);
-      }
-
-      if ( MBlockDispFrags[3] && (Error < MinError) ) {
-        Error += dsp_sad8x8_thres (cpi->dsp, SrcPtr[3], PixelsPerLine, CandidateBlockPtr + RefRow2Offset + 8,
-                             PixelsPerLine + STRIDE_EXTRA, MinError);
-      }
-
-      if ( Error < MinError ) {
-        /* Remember best match. */
-        MinError = Error;
-        BestBlockPtr = CandidateBlockPtr;
-
-                                /* Where is it. */
-        x = MV->x + cpi->MVOffsetX[SearchSite];
-        y = MV->y + cpi->MVOffsetY[SearchSite];
-      }
-
-      /* Move to next search location. */
-      SearchSite += 1;
-    }
-
-    /* Move to best location this step. */
-    RefPtr = BestBlockPtr;
-    MV->x = x;
-    MV->y = y;
-  }
-
-  /* Factor vectors to 1/2 pixel resoultion. */
-  MV->x = (MV->x * 2);
-  MV->y = (MV->y * 2);
-
-  /* Now do the half pixel pass */
-  BestHalfOffset = 4;     /* Default to the no offset case. */
-  BestHalfPixelError = MinError;
-
-  /* Get the half pixel error for each half pixel offset */
-  for ( i=0; i < 9; i++ ) {
-    HalfPixelError = 0;
-
-    if ( MBlockDispFrags[0] ) {
-      RefDataPtr1 = BestBlockPtr;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[0], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( MBlockDispFrags[1]  && (HalfPixelError < BestHalfPixelError) ) {
-      RefDataPtr1 = BestBlockPtr + 8;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[1], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) {
-      RefDataPtr1 = BestBlockPtr + RefRow2Offset;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[2], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) {
-      RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[3], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( HalfPixelError < BestHalfPixelError ) {
-      BestHalfOffset = (unsigned char)i;
-      BestHalfPixelError = HalfPixelError;
-    }
-  }
-
-  /* Half pixel adjust the MV */
-  MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
-  MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
-
-  /* Get the error score for the chosen 1/2 pixel offset as a variance. */
-  InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr,
-                                  FragIndex, MV->x, MV->y, PixelsPerLine );
-
-  dsp_restore_fpu (cpi->dsp);
-
-  /* Return score of best matching block. */
-  return InterMVError;
-}
-
-ogg_uint32_t GetMBMVExhaustiveSearch (CP_INSTANCE *cpi,
-                                      unsigned char * RefFramePtr,
-                                      ogg_uint32_t FragIndex,
-                                      ogg_uint32_t PixelsPerLine,
-                                      MOTION_VECTOR *MV ) {
-  ogg_uint32_t  Error = 0;
-  ogg_uint32_t  MinError = HUGE_ERROR;
-  ogg_uint32_t  InterMVError = 0;
-
-  ogg_int32_t   i, j;
-  ogg_int32_t   x=0, y=0;
-
-  unsigned char *SrcPtr[4] = {NULL,NULL,NULL,NULL};
-  unsigned char *RefPtr;
-  unsigned char *CandidateBlockPtr=NULL;
-  unsigned char *BestBlockPtr=NULL;
-
-  ogg_uint32_t  RefRow2Offset = cpi->pb.YStride * 8;
-
-  int    MBlockDispFrags[4];
-
-  /* Half pixel variables */
-  ogg_int32_t   HalfPixelError;
-  ogg_int32_t   BestHalfPixelError;
-  unsigned char   BestHalfOffset;
-  unsigned char * RefDataPtr1;
-  unsigned char * RefDataPtr2;
-
-  dsp_save_fpu (cpi->dsp);
-
-  /* Note which of the four blocks in the macro block are to be
-     included in the search. */
-  MBlockDispFrags[0] = cpi->
-    pb.display_fragments[FragIndex];
-  MBlockDispFrags[1] = cpi->
-    pb.display_fragments[FragIndex + 1];
-  MBlockDispFrags[2] = cpi->
-    pb.display_fragments[FragIndex + cpi->pb.HFragments];
-  MBlockDispFrags[3] = cpi->
-    pb.display_fragments[FragIndex + cpi->pb.HFragments + 1];
-
-  /* Set up the source pointers for the four source blocks. */
-  SrcPtr[0] = &cpi->
-    ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
-  SrcPtr[1] = SrcPtr[0] + 8;
-  SrcPtr[2] = SrcPtr[0] + (PixelsPerLine * 8);
-  SrcPtr[3] = SrcPtr[2] + 8;
-
-  RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
-  RefPtr = RefPtr - ((MAX_MV_EXTENT/2) * cpi->
-                     pb.YStride) - (MAX_MV_EXTENT/2);
-
-  /* Search each pixel alligned site */
-  for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) {
-    /* Starting position in row */
-    CandidateBlockPtr = RefPtr;
-
-    for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ) {
-      /* Reset error */
-      Error = 0;
-
-      /* Summ errors for each block. */
-      if ( MBlockDispFrags[0] ) {
-        Error += dsp_sad8x8 (cpi->dsp, SrcPtr[0], PixelsPerLine, CandidateBlockPtr,
-                             PixelsPerLine + STRIDE_EXTRA);
-      }
-      if ( MBlockDispFrags[1] ){
-        Error += dsp_sad8x8 (cpi->dsp, SrcPtr[1], PixelsPerLine, CandidateBlockPtr + 8,
-                             PixelsPerLine + STRIDE_EXTRA);
-      }
-      if ( MBlockDispFrags[2] ){
-        Error += dsp_sad8x8 (cpi->dsp, SrcPtr[2], PixelsPerLine, CandidateBlockPtr + RefRow2Offset,
-                             PixelsPerLine + STRIDE_EXTRA);
-      }
-      if ( MBlockDispFrags[3] ){
-        Error += dsp_sad8x8 (cpi->dsp, SrcPtr[3], PixelsPerLine, CandidateBlockPtr + RefRow2Offset + 8,
-                             PixelsPerLine + STRIDE_EXTRA);
-      }
-
-      /* Was this the best so far */
-      if ( Error < MinError ) {
-        MinError = Error;
-        BestBlockPtr = CandidateBlockPtr;
-        x = 16 + j - MAX_MV_EXTENT;
-        y = 16 + i - MAX_MV_EXTENT;
-      }
-
-      /* Move the the next site */
-      CandidateBlockPtr ++;
-    }
-
-    /* Move on to the next row. */
-    RefPtr += cpi->pb.YStride;
-
-  }
-
-  /* Factor vectors to 1/2 pixel resoultion. */
-  MV->x = (x * 2);
-  MV->y = (y * 2);
-
-  /* Now do the half pixel pass */
-  BestHalfOffset = 4;     /* Default to the no offset case. */
-  BestHalfPixelError = MinError;
-
-  /* Get the half pixel error for each half pixel offset */
-  for ( i=0; i < 9; i++ ) {
-    HalfPixelError = 0;
-
-    if ( MBlockDispFrags[0] ) {
-      RefDataPtr1 = BestBlockPtr;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[0], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( MBlockDispFrags[1]  && (HalfPixelError < BestHalfPixelError) ) {
-      RefDataPtr1 = BestBlockPtr + 8;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[1], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( MBlockDispFrags[2] && (HalfPixelError < BestHalfPixelError) ) {
-      RefDataPtr1 = BestBlockPtr + RefRow2Offset;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[2], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( MBlockDispFrags[3] && (HalfPixelError < BestHalfPixelError) ) {
-      RefDataPtr1 = BestBlockPtr + RefRow2Offset + 8;
-      RefDataPtr2 = RefDataPtr1 + cpi->HalfPixelRef2Offset[i];
-      HalfPixelError =
-        GetHalfPixelSumAbsDiffs(cpi, SrcPtr[3], RefDataPtr1, RefDataPtr2,
-                         PixelsPerLine, HalfPixelError, BestHalfPixelError );
-    }
-
-    if ( HalfPixelError < BestHalfPixelError ){
-      BestHalfOffset = (unsigned char)i;
-      BestHalfPixelError = HalfPixelError;
-    }
-  }
-
-  /* Half pixel adjust the MV */
-  MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
-  MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
-
-  /* Get the error score for the chosen 1/2 pixel offset as a variance. */
-  InterMVError = GetMBInterError( cpi, cpi->ConvDestBuffer, RefFramePtr,
-                                  FragIndex, MV->x, MV->y, PixelsPerLine );
-
-  dsp_restore_fpu (cpi->dsp);
-
-  /* Return score of best matching block. */
-  return InterMVError;
-}
-
-static ogg_uint32_t GetBMVExhaustiveSearch (CP_INSTANCE *cpi,
-                                            unsigned char * RefFramePtr,
-                                            ogg_uint32_t FragIndex,
-                                            ogg_uint32_t PixelsPerLine,
-                                            MOTION_VECTOR *MV ) {
-  ogg_uint32_t  Error = 0;
-  ogg_uint32_t  MinError = HUGE_ERROR;
-  ogg_uint32_t  InterMVError = 0;
-
-  ogg_int32_t   i, j;
-  ogg_int32_t   x=0, y=0;
-
-  unsigned char *SrcPtr = NULL;
-  unsigned char *RefPtr;
-  unsigned char *CandidateBlockPtr=NULL;
-  unsigned char *BestBlockPtr=NULL;
-
-  /* Half pixel variables */
-  ogg_int32_t   HalfPixelError;
-  ogg_int32_t   BestHalfPixelError;
-  unsigned char   BestHalfOffset;
-  unsigned char * RefDataPtr2;
-
-  /* Set up the source pointer for the block. */
-  SrcPtr = &cpi->
-    ConvDestBuffer[cpi->pb.pixel_index_table[FragIndex]];
-
-  RefPtr = &RefFramePtr[cpi->pb.recon_pixel_index_table[FragIndex]];
-  RefPtr = RefPtr - ((MAX_MV_EXTENT/2) *
-                     cpi->pb.YStride) - (MAX_MV_EXTENT/2);
-
-  /* Search each pixel alligned site */
-  for ( i = 0; i < (ogg_int32_t)MAX_MV_EXTENT; i ++ ) {
-    /* Starting position in row */
-    CandidateBlockPtr = RefPtr;
-
-    for ( j = 0; j < (ogg_int32_t)MAX_MV_EXTENT; j++ ){
-      /* Get the block error score. */
-      Error = dsp_sad8x8 (cpi->dsp, SrcPtr, PixelsPerLine, CandidateBlockPtr,
-                             PixelsPerLine + STRIDE_EXTRA);
-
-      /* Was this the best so far */
-      if ( Error < MinError ) {
-        MinError = Error;
-        BestBlockPtr = CandidateBlockPtr;
-        x = 16 + j - MAX_MV_EXTENT;
-        y = 16 + i - MAX_MV_EXTENT;
-      }
-
-      /* Move the the next site */
-      CandidateBlockPtr ++;
-    }
-
-    /* Move on to the next row. */
-    RefPtr += cpi->pb.YStride;
-  }
-
-  /* Factor vectors to 1/2 pixel resoultion. */
-  MV->x = (x * 2);
-  MV->y = (y * 2);
-
-  /* Now do the half pixel pass */
-  BestHalfOffset = 4;     /* Default to the no offset case. */
-  BestHalfPixelError = MinError;
-
-  /* Get the half pixel error for each half pixel offset */
-  for ( i=0; i < 9; i++ ) {
-    RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[i];
-    HalfPixelError =
-      GetHalfPixelSumAbsDiffs(cpi, SrcPtr, BestBlockPtr, RefDataPtr2,
-                            PixelsPerLine, 0, BestHalfPixelError );
-
-    if ( HalfPixelError < BestHalfPixelError ){
-      BestHalfOffset = (unsigned char)i;
-      BestHalfPixelError = HalfPixelError;
-    }
-  }
-
-  /* Half pixel adjust the MV */
-  MV->x += cpi->HalfPixelXOffset[BestHalfOffset];
-  MV->y += cpi->HalfPixelYOffset[BestHalfOffset];
-
-  /* Get the variance score at the chosen offset */
-  RefDataPtr2 = BestBlockPtr + cpi->HalfPixelRef2Offset[BestHalfOffset];
-
-  InterMVError =
-    GetInterErr(cpi, SrcPtr, BestBlockPtr, RefDataPtr2, PixelsPerLine );
-
-  /* Return score of best matching block. */
-  return InterMVError;
-}
-
-ogg_uint32_t GetFOURMVExhaustiveSearch (CP_INSTANCE *cpi,
-                                        unsigned char * RefFramePtr,
-                                        ogg_uint32_t FragIndex,
-                                        ogg_uint32_t PixelsPerLine,
-                                        MOTION_VECTOR *MV ) {
-  ogg_uint32_t  InterMVError;
-
-  dsp_save_fpu (cpi->dsp);
-
-  /* For the moment the 4MV mode is only deemed to be valid
-     if all four Y blocks are to be updated */
-  /* This may be adapted later. */
-  if ( cpi->pb.display_fragments[FragIndex] &&
-       cpi->pb.display_fragments[FragIndex + 1] &&
-       cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments] &&
-       cpi->pb.display_fragments[FragIndex + cpi->pb.HFragments + 1] ) {
-
-    /* Reset the error score. */
-    InterMVError = 0;
-
-    /* Get the error component from each coded block */
-    InterMVError +=
-      GetBMVExhaustiveSearch(cpi, RefFramePtr, FragIndex,
-                             PixelsPerLine, &(MV[0]) );
-    InterMVError +=
-      GetBMVExhaustiveSearch(cpi, RefFramePtr, (FragIndex + 1),
-                             PixelsPerLine, &(MV[1]) );
-    InterMVError +=
-      GetBMVExhaustiveSearch(cpi, RefFramePtr,
-                             (FragIndex + cpi->pb.HFragments),
-                             PixelsPerLine, &(MV[2]) );
-    InterMVError +=
-      GetBMVExhaustiveSearch(cpi, RefFramePtr,
-                             (FragIndex + cpi->pb.HFragments + 1),
-                             PixelsPerLine, &(MV[3]) );
-  }else{
-    InterMVError = HUGE_ERROR;
-  }
-
-  dsp_restore_fpu (cpi->dsp);
-
-  /* Return score of best matching block. */
-  return InterMVError;
-}
-

+ 0 - 339
Engine/lib/libtheora/lib/enc/misc_common.c

@@ -1,339 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: misc_common.c 15323 2008-09-19 19:43:59Z giles $
-
- ********************************************************************/
-
-#include <string.h>
-#include "codec_internal.h"
-#include "block_inline.h"
-
-#define FIXED_Q                 150
-#define MAX_UP_REG_LOOPS        2
-
-/* Gives the initial bytes per block estimate for each Q value */
-static const double BpbTable[Q_TABLE_SIZE] = {
-  0.42,  0.45,  0.46,  0.49,  0.51,  0.53,  0.56,  0.58,
-  0.61,  0.64,  0.68,  0.71,  0.74,  0.77,  0.80,  0.84,
-  0.89,  0.92,  0.98,  1.01,  1.04,  1.13,  1.17,  1.23,
-  1.28,  1.34,  1.41,  1.45,  1.51,  1.59,  1.69,  1.80,
-  1.84,  1.94,  2.02,  2.15,  2.23,  2.34,  2.44,  2.50,
-  2.69,  2.80,  2.87,  3.04,  3.16,  3.29,  3.59,  3.66,
-  3.86,  3.94,  4.22,  4.50,  4.64,  4.70,  5.24,  5.34,
-  5.61,  5.87,  6.11,  6.41,  6.71,  6.99,  7.36,  7.69
-};
-
-static const double KfBpbTable[Q_TABLE_SIZE] = {
-  0.74,  0.81,  0.88,  0.94,  1.00,  1.06,  1.14,  1.19,
-  1.27,  1.34,  1.42,  1.49,  1.54,  1.59,  1.66,  1.73,
-  1.80,  1.87,  1.97,  2.01,  2.08,  2.21,  2.25,  2.36,
-  2.39,  2.50,  2.55,  2.65,  2.71,  2.82,  2.95,  3.01,
-  3.11,  3.19,  3.31,  3.42,  3.58,  3.66,  3.78,  3.89,
-  4.11,  4.26,  4.36,  4.39,  4.63,  4.76,  4.85,  5.04,
-  5.26,  5.29,  5.47,  5.64,  5.76,  6.05,  6.35,  6.67,
-  6.91,  7.17,  7.40,  7.56,  8.02,  8.45,  8.86,  9.38
-};
-
-double GetEstimatedBpb( CP_INSTANCE *cpi, ogg_uint32_t TargetQ ){
-  ogg_uint32_t i;
-  ogg_int32_t ThreshTableIndex = Q_TABLE_SIZE - 1;
-  double BytesPerBlock;
-
-  /* Search for the Q table index that matches the given Q. */
-  for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
-    if ( TargetQ >= cpi->pb.QThreshTable[i] ) {
-      ThreshTableIndex = i;
-      break;
-    }
-  }
-
-  /* Adjust according to Q shift and type of frame */
-  if ( cpi->pb.FrameType == KEY_FRAME ) {
-    /* Get primary prediction */
-    BytesPerBlock = KfBpbTable[ThreshTableIndex];
-  } else {
-    /* Get primary prediction */
-    BytesPerBlock = BpbTable[ThreshTableIndex];
-    BytesPerBlock = BytesPerBlock * cpi->BpbCorrectionFactor;
-  }
-
-  return BytesPerBlock;
-}
-
-static void UpRegulateMB( CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
-                   ogg_uint32_t SB, ogg_uint32_t MB, int NoCheck ) {
-  ogg_int32_t  FragIndex;
-  ogg_uint32_t B;
-
-  /* Variables used in calculating corresponding row,col and index in
-     UV planes */
-  ogg_uint32_t UVRow;
-  ogg_uint32_t UVColumn;
-  ogg_uint32_t UVFragOffset;
-
-  /* There may be MB's lying out of frame which must be ignored. For
-   these MB's Top left block will have a negative Fragment Index. */
-  if ( QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB ) >= 0 ) {
-    /* Up regulate the component blocks Y then UV. */
-    for ( B=0; B<4; B++ ){
-      FragIndex = QuadMapToIndex1( cpi->pb.BlockMap, SB, MB, B );
-
-      if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
-           ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ){
-        cpi->pb.display_fragments[FragIndex] = 1;
-        cpi->extra_fragments[FragIndex] = 1;
-        cpi->FragmentLastQ[FragIndex] = RegulationQ;
-        cpi->MotionScore++;
-      }
-    }
-
-    /* Check the two UV blocks */
-    FragIndex = QuadMapToMBTopLeft(cpi->pb.BlockMap, SB, MB );
-
-    UVRow = (FragIndex / (cpi->pb.HFragments * 2));
-    UVColumn = (FragIndex % cpi->pb.HFragments) / 2;
-    UVFragOffset = (UVRow * (cpi->pb.HFragments / 2)) + UVColumn;
-
-    FragIndex = cpi->pb.YPlaneFragments + UVFragOffset;
-    if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
-         ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) {
-      cpi->pb.display_fragments[FragIndex] = 1;
-      cpi->extra_fragments[FragIndex] = 1;
-      cpi->FragmentLastQ[FragIndex] = RegulationQ;
-      cpi->MotionScore++;
-    }
-
-    FragIndex += cpi->pb.UVPlaneFragments;
-    if ( ( !cpi->pb.display_fragments[FragIndex] ) &&
-         ( (NoCheck) || (cpi->FragmentLastQ[FragIndex] > RegulationQ) ) ) {
-      cpi->pb.display_fragments[FragIndex] = 1;
-      cpi->extra_fragments[FragIndex] = 1;
-      cpi->FragmentLastQ[FragIndex] = RegulationQ;
-      cpi->MotionScore++;
-    }
-  }
-}
-
-static void UpRegulateBlocks (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
-                       ogg_int32_t RecoveryBlocks,
-                       ogg_uint32_t * LastSB, ogg_uint32_t * LastMB ) {
-
-  ogg_uint32_t LoopTimesRound = 0;
-  ogg_uint32_t MaxSB = cpi->pb.YSBRows *
-    cpi->pb.YSBCols;   /* Tot super blocks in image */
-  ogg_uint32_t SB, MB; /* Super-Block and macro block indices. */
-
-  /* First scan for blocks for which a residue update is outstanding. */
-  while ( (cpi->MotionScore < RecoveryBlocks) &&
-          (LoopTimesRound < MAX_UP_REG_LOOPS) ) {
-    LoopTimesRound++;
-
-    for ( SB = (*LastSB); SB < MaxSB; SB++ ) {
-      /* Check its four Macro-Blocks */
-      for ( MB=(*LastMB); MB<4; MB++ ) {
-        /* Mark relevant blocks for update */
-        UpRegulateMB( cpi, RegulationQ, SB, MB, 0 );
-
-        /* Keep track of the last refresh MB. */
-        (*LastMB) += 1;
-        if ( (*LastMB) == 4 )
-          (*LastMB) = 0;
-
-        /* Termination clause */
-        if (cpi->MotionScore >= RecoveryBlocks) {
-          /* Make sure we don't stall at SB level */
-          if ( *LastMB == 0 )
-            SB++;
-          break;
-        }
-      }
-
-      /* Termination clause */
-      if (cpi->MotionScore >= RecoveryBlocks)
-        break;
-    }
-
-    /* Update super block start index  */
-    if ( SB >= MaxSB){
-      (*LastSB) = 0;
-    }else{
-      (*LastSB) = SB;
-    }
-  }
-}
-
-void UpRegulateDataStream (CP_INSTANCE *cpi, ogg_uint32_t RegulationQ,
-                           ogg_int32_t RecoveryBlocks ) {
-  ogg_uint32_t LastPassMBPos = 0;
-  ogg_uint32_t StdLastMBPos = 0;
-
-  ogg_uint32_t MaxSB = cpi->pb.YSBRows *
-    cpi->pb.YSBCols;    /* Tot super blocks in image */
-
-  ogg_uint32_t SB=0;    /* Super-Block index */
-  ogg_uint32_t MB;      /* Macro-Block index */
-
-  /* Decduct the number of blocks in an MB / 2 from the recover block count.
-     This will compensate for the fact that once we start checking an MB
-     we test every block in that macro block */
-  if ( RecoveryBlocks > 3 )
-    RecoveryBlocks -= 3;
-
-  /* Up regulate blocks last coded at higher Q */
-  UpRegulateBlocks( cpi, RegulationQ, RecoveryBlocks,
-                    &cpi->LastEndSB, &StdLastMBPos );
-
-  /* If we have still not used up the minimum number of blocks and are
-     at the minimum Q then run through a final pass of the data to
-     insure that each block gets a final refresh. */
-  if ( (RegulationQ == VERY_BEST_Q) &&
-       (cpi->MotionScore < RecoveryBlocks) ) {
-    if ( cpi->FinalPassLastPos < MaxSB ) {
-      for ( SB = cpi->FinalPassLastPos; SB < MaxSB; SB++ ) {
-        /* Check its four Macro-Blocks */
-        for ( MB=LastPassMBPos; MB<4; MB++ ) {
-          /* Mark relevant blocks for update */
-          UpRegulateMB( cpi, RegulationQ, SB, MB, 1 );
-
-          /* Keep track of the last refresh MB. */
-          LastPassMBPos += 1;
-          if ( LastPassMBPos == 4 ) {
-            LastPassMBPos = 0;
-
-            /* Increment SB index */
-            cpi->FinalPassLastPos += 1;
-          }
-
-          /* Termination clause */
-          if (cpi->MotionScore >= RecoveryBlocks)
-            break;
-        }
-
-        /* Termination clause */
-        if (cpi->MotionScore >= RecoveryBlocks)
-          break;
-
-      }
-    }
-  }
-}
-
-void RegulateQ( CP_INSTANCE *cpi, ogg_int32_t UpdateScore ) {
-  double PredUnitScoreBytes;
-  ogg_uint32_t QIndex = Q_TABLE_SIZE - 1;
-  ogg_uint32_t i;
-
-  if ( UpdateScore > 0 ) {
-    double TargetUnitScoreBytes = (double)cpi->ThisFrameTargetBytes /
-      (double)UpdateScore;
-    double LastBitError = 10000.0;       /* Silly high number */
-    /* Search for the best Q for the target bitrate. */
-    for ( i = 0; i < Q_TABLE_SIZE; i++ ) {
-      PredUnitScoreBytes = GetEstimatedBpb( cpi, cpi->pb.QThreshTable[i] );
-      if ( PredUnitScoreBytes > TargetUnitScoreBytes ) {
-        if ( (PredUnitScoreBytes - TargetUnitScoreBytes) <= LastBitError ) {
-          QIndex = i;
-        } else {
-          QIndex = i - 1;
-        }
-        break;
-      } else {
-        LastBitError = TargetUnitScoreBytes - PredUnitScoreBytes;
-      }
-    }
-  }
-
-  /* QIndex should now indicate the optimal Q. */
-  cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[QIndex];
-
-  /* Apply range restrictions for key frames. */
-  if ( cpi->pb.FrameType == KEY_FRAME ) {
-    if ( cpi->pb.ThisFrameQualityValue > cpi->pb.QThreshTable[20] )
-      cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[20];
-    else if ( cpi->pb.ThisFrameQualityValue < cpi->pb.QThreshTable[50] )
-      cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[50];
-  }
-
-  /* Limit the Q value to the maximum available value */
-  if (cpi->pb.ThisFrameQualityValue >
-      cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ]) {
-    cpi->pb.ThisFrameQualityValue =
-      (ogg_uint32_t)cpi->pb.QThreshTable[cpi->Configuration.ActiveMaxQ];
-  }
-
-  if(cpi->FixedQ) {
-    if ( cpi->pb.FrameType == KEY_FRAME ) {
-      cpi->pb.ThisFrameQualityValue = cpi->pb.QThreshTable[43];
-      cpi->pb.ThisFrameQualityValue = cpi->FixedQ;
-    } else {
-      cpi->pb.ThisFrameQualityValue = cpi->FixedQ;
-    }
-  }
-
-  /* If the quantizer value has changed then re-initialise it */
-  if ( cpi->pb.ThisFrameQualityValue != cpi->pb.LastFrameQualityValue ) {
-    /* Initialise quality tables. */
-    UpdateQC( cpi, cpi->pb.ThisFrameQualityValue );
-    cpi->pb.LastFrameQualityValue = cpi->pb.ThisFrameQualityValue;
-  }
-}
-
-void CopyBackExtraFrags(CP_INSTANCE *cpi){
-  ogg_uint32_t  i,j;
-  unsigned char * SrcPtr;
-  unsigned char * DestPtr;
-  ogg_uint32_t  PlaneLineStep;
-  ogg_uint32_t  PixelIndex;
-
-  /*  Copy back for Y plane. */
-  PlaneLineStep = cpi->pb.info.width;
-  for ( i = 0; i < cpi->pb.YPlaneFragments; i++ ) {
-    /* We are only interested in updated fragments. */
-    if ( cpi->extra_fragments[i] ) {
-      /* Get the start index for the fragment. */
-      PixelIndex = cpi->pb.pixel_index_table[i];
-      SrcPtr = &cpi->yuv1ptr[PixelIndex];
-      DestPtr = &cpi->ConvDestBuffer[PixelIndex];
-
-      for ( j = 0; j < VFRAGPIXELS; j++ ) {
-        memcpy( DestPtr, SrcPtr, HFRAGPIXELS);
-
-        SrcPtr += PlaneLineStep;
-        DestPtr += PlaneLineStep;
-      }
-    }
-  }
-
-  /* Now the U and V planes */
-  PlaneLineStep = cpi->pb.info.width / 2;
-  for ( i = cpi->pb.YPlaneFragments;
-        i < (cpi->pb.YPlaneFragments + (2 * cpi->pb.UVPlaneFragments)) ;
-        i++ ) {
-
-    /* We are only interested in updated fragments. */
-    if ( cpi->extra_fragments[i] ) {
-      /* Get the start index for the fragment. */
-      PixelIndex = cpi->pb.pixel_index_table[i];
-      SrcPtr = &cpi->yuv1ptr[PixelIndex];
-      DestPtr = &cpi->ConvDestBuffer[PixelIndex];
-
-      for ( j = 0; j < VFRAGPIXELS; j++ ) {
-        memcpy( DestPtr, SrcPtr, HFRAGPIXELS);
-        SrcPtr += PlaneLineStep;
-        DestPtr += PlaneLineStep;
-      }
-    }
-  }
-}
-

+ 0 - 89
Engine/lib/libtheora/lib/enc/pb.c

@@ -1,89 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: pb.c 14372 2008-01-05 23:52:28Z giles $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include "codec_internal.h"
-
-void ClearTmpBuffers(PB_INSTANCE * pbi){
-
-  if(pbi->ReconDataBuffer)
-    _ogg_free(pbi->ReconDataBuffer);
-  if(pbi->DequantBuffer)
-    _ogg_free(pbi->DequantBuffer);
-  if(pbi->TmpDataBuffer)
-    _ogg_free(pbi->TmpDataBuffer);
-  if(pbi->TmpReconBuffer)
-    _ogg_free(pbi->TmpReconBuffer);
-
-
-  pbi->ReconDataBuffer=0;
-  pbi->DequantBuffer = 0;
-  pbi->TmpDataBuffer = 0;
-  pbi->TmpReconBuffer = 0;
-
-}
-
-void InitTmpBuffers(PB_INSTANCE * pbi){
-
-  /* clear any existing info */
-  ClearTmpBuffers(pbi);
-
-  /* Adjust the position of all of our temporary */
-  pbi->ReconDataBuffer      =
-    _ogg_malloc(64*sizeof(*pbi->ReconDataBuffer));
-
-  pbi->DequantBuffer        =
-    _ogg_malloc(64 * sizeof(*pbi->DequantBuffer));
-
-  pbi->TmpDataBuffer        =
-    _ogg_malloc(64 * sizeof(*pbi->TmpDataBuffer));
-
-  pbi->TmpReconBuffer       =
-    _ogg_malloc(64 * sizeof(*pbi->TmpReconBuffer));
-
-}
-
-void ClearPBInstance(PB_INSTANCE *pbi){
-  if(pbi){
-    ClearTmpBuffers(pbi);
-    if (pbi->opb) {
-      _ogg_free(pbi->opb);
-    }
-  }
-}
-
-void InitPBInstance(PB_INSTANCE *pbi){
-  /* initialize whole structure to 0 */
-  memset(pbi, 0, sizeof(*pbi));
-
-  InitTmpBuffers(pbi);
-
-  /* allocate memory for the oggpack_buffer */
-  pbi->opb = _ogg_malloc(sizeof(oggpack_buffer));
-
-  /* variables needing initialization (not being set to 0) */
-
-  pbi->ModifierPointer[0] = &pbi->Modifier[0][255];
-  pbi->ModifierPointer[1] = &pbi->Modifier[1][255];
-  pbi->ModifierPointer[2] = &pbi->Modifier[2][255];
-  pbi->ModifierPointer[3] = &pbi->Modifier[3][255];
-
-  pbi->DecoderErrorCode = 0;
-  pbi->KeyFrameType = DCT_KEY_FRAME;
-  pbi->FramesHaveBeenSkipped = 0;
-}

+ 0 - 951
Engine/lib/libtheora/lib/enc/pp.c

@@ -1,951 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: pp.c 15057 2008-06-22 21:07:32Z xiphmont $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include "codec_internal.h"
-#include "pp.h"
-#include "dsp.h"
-
-#define MAX(a, b) ((a>b)?a:b)
-#define MIN(a, b) ((a<b)?a:b)
-#define PP_QUALITY_THRESH   49
-
-static const ogg_int32_t SharpenModifier[ Q_TABLE_SIZE ] =
-{  -12, -11, -10, -10,  -9,  -9,  -9,  -9,
-   -6,  -6,  -6,  -6,  -6,  -6,  -6,  -6,
-   -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,
-   -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
-   -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
-   0,  0,  0,  0,  0,  0,  0,  0,
-   0,  0,  0,  0,  0,  0,  0,  0,
-   0,  0,  0,  0,  0,  0,  0,  0
-};
-
-static const ogg_uint32_t DcQuantScaleV1[ Q_TABLE_SIZE ] = {
-  22, 20, 19, 18, 17, 17, 16, 16,
-  15, 15, 14, 14, 13, 13, 12, 12,
-  11, 11, 10, 10, 9,  9,  9,  8,
-  8,  8,  7,  7,  7,  6,  6,  6,
-  6,  5,  5,  5,  5,  4,  4,  4,
-  4,  4,  3,  3,  3,  3,  3,  3,
-  3,  2,  2,  2,  2,  2,  2,  2,
-  2,  1,  1,  1,  1,  1,  1,  1
-};
-
-static const ogg_uint32_t * const DeringModifierV1=DcQuantScaleV1;
-
-static void PClearFrameInfo(PP_INSTANCE * ppi){
-  int i;
-
-  if(ppi->ScanPixelIndexTable) _ogg_free(ppi->ScanPixelIndexTable);
-  ppi->ScanPixelIndexTable=0;
-
-  if(ppi->ScanDisplayFragments) _ogg_free(ppi->ScanDisplayFragments);
-  ppi->ScanDisplayFragments=0;
-
-  for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
-    if(ppi->PrevFragments[i]){
-      _ogg_free(ppi->PrevFragments[i]);
-      ppi->PrevFragments[i]=0;
-    }
-
-  if(ppi->FragScores) _ogg_free(ppi->FragScores);
-  ppi->FragScores=0;
-
-  if(ppi->SameGreyDirPixels) _ogg_free(ppi->SameGreyDirPixels);
-  ppi->SameGreyDirPixels=0;
-
-  if(ppi->FragDiffPixels) _ogg_free(ppi->FragDiffPixels);
-  ppi->FragDiffPixels=0;
-
-  if(ppi->BarBlockMap) _ogg_free(ppi->BarBlockMap);
-  ppi->BarBlockMap=0;
-
-  if(ppi->TmpCodedMap) _ogg_free(ppi->TmpCodedMap);
-  ppi->TmpCodedMap=0;
-
-  if(ppi->RowChangedPixels) _ogg_free(ppi->RowChangedPixels);
-  ppi->RowChangedPixels=0;
-
-  if(ppi->PixelScores) _ogg_free(ppi->PixelScores);
-  ppi->PixelScores=0;
-
-  if(ppi->PixelChangedMap) _ogg_free(ppi->PixelChangedMap);
-  ppi->PixelChangedMap=0;
-
-  if(ppi->ChLocals) _ogg_free(ppi->ChLocals);
-  ppi->ChLocals=0;
-
-  if(ppi->yuv_differences) _ogg_free(ppi->yuv_differences);
-  ppi->yuv_differences=0;
-
-}
-
-void PInitFrameInfo(PP_INSTANCE * ppi){
-  int i;
-  PClearFrameInfo(ppi);
-
-  ppi->ScanPixelIndexTable =
-    _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanPixelIndexTable));
-
-  ppi->ScanDisplayFragments =
-    _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanDisplayFragments));
-
-  for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
-    ppi->PrevFragments[i] =
-      _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->PrevFragments));
-
-  ppi->FragScores =
-    _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
-
-  ppi->SameGreyDirPixels =
-    _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->SameGreyDirPixels));
-
-  ppi->FragDiffPixels =
-    _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
-
-  ppi->BarBlockMap=
-    _ogg_malloc(3 * ppi->ScanHFragments*sizeof(*ppi->BarBlockMap));
-
-  ppi->TmpCodedMap =
-    _ogg_malloc(ppi->ScanHFragments*sizeof(*ppi->TmpCodedMap));
-
-  ppi->RowChangedPixels =
-    _ogg_malloc(3 * ppi->ScanConfig.VideoFrameHeight*
-                sizeof(*ppi->RowChangedPixels));
-
-  ppi->PixelScores =
-    _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
-                sizeof(*ppi->PixelScores) * PSCORE_CB_ROWS);
-
-  ppi->PixelChangedMap =
-    _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
-                sizeof(*ppi->PixelChangedMap) * PMAP_CB_ROWS);
-
-  ppi->ChLocals =
-    _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
-                sizeof(*ppi->ChLocals) * CHLOCALS_CB_ROWS);
-
-  ppi->yuv_differences =
-    _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
-                sizeof(*ppi->yuv_differences) * YDIFF_CB_ROWS);
-}
-
-void ClearPPInstance(PP_INSTANCE *ppi){
-  PClearFrameInfo(ppi);
-}
-
-
-void InitPPInstance(PP_INSTANCE *ppi, DspFunctions *funcs){
-
-  memset(ppi,0,sizeof(*ppi));
-
-  memcpy(&ppi->dsp, funcs, sizeof(DspFunctions));
-
-  /* Initializations */
-  ppi->PrevFrameLimit = 3; /* Must not exceed MAX_PREV_FRAMES (Note
-                              that this number includes the current
-                              frame so "1 = no effect") */
-
-  /* Scan control variables. */
-  ppi->HFragPixels = 8;
-  ppi->VFragPixels = 8;
-
-  ppi->SRFGreyThresh = 4;
-  ppi->SRFColThresh = 5;
-  ppi->NoiseSupLevel = 3;
-  ppi->SgcLevelThresh = 3;
-  ppi->SuvcLevelThresh = 4;
-
-  /* Variables controlling S.A.D. breakouts. */
-  ppi->GrpLowSadThresh = 10;
-  ppi->GrpHighSadThresh = 64;
-  ppi->PrimaryBlockThreshold = 5;
-  ppi->SgcThresh = 16;  /* (Default values for 8x8 blocks). */
-
-  ppi->UVBlockThreshCorrection = 1.25;
-  ppi->UVSgcCorrection = 1.5;
-
-  ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
-}
-
-static void DeringBlockStrong(unsigned char *SrcPtr,
-                              unsigned char *DstPtr,
-                              ogg_int32_t Pitch,
-                              ogg_uint32_t FragQIndex,
-                              const ogg_uint32_t *QuantScale){
-
-  ogg_int16_t UDMod[72];
-  ogg_int16_t LRMod[72];
-  unsigned int j,k,l;
-  const unsigned char * Src;
-  unsigned int QValue = QuantScale[FragQIndex];
-
-  unsigned char p;
-  unsigned char pl;
-  unsigned char pr;
-  unsigned char pu;
-  unsigned char pd;
-
-  int  al;
-  int  ar;
-  int  au;
-  int  ad;
-
-  int  atot;
-  int  B;
-  int  newVal;
-
-  const unsigned char *curRow = SrcPtr - 1; /* avoid negative array indexes */
-  unsigned char *dstRow = DstPtr;
-  const unsigned char *lastRow = SrcPtr-Pitch;
-  const unsigned char *nextRow = SrcPtr+Pitch;
-
-  unsigned int rowOffset = 0;
-  unsigned int round = (1<<6);
-
-  int High;
-  int Low;
-  int TmpMod;
-
-  int Sharpen = SharpenModifier[FragQIndex];
-  High = 3 * QValue;
-  if(High>32)High=32;
-  Low = 0;
-
-
-  /* Initialize the Mod Data */
-  Src = SrcPtr-Pitch;
-  for(k=0;k<9;k++){
-    for(j=0;j<8;j++){
-
-      TmpMod = 32 + QValue - (abs(Src[j+Pitch]-Src[j]));
-
-      if(TmpMod< -64)
-        TmpMod = Sharpen;
-
-      else if(TmpMod<Low)
-        TmpMod = Low;
-
-      else if(TmpMod>High)
-        TmpMod = High;
-
-      UDMod[k*8+j] = (ogg_int16_t)TmpMod;
-    }
-    Src +=Pitch;
-  }
-
-  Src = SrcPtr-1;
-
-  for(k=0;k<8;k++){
-    for(j=0;j<9;j++){
-      TmpMod = 32 + QValue - (abs(Src[j+1]-Src[j]));
-
-      if(TmpMod< -64 )
-        TmpMod = Sharpen;
-
-      else if(TmpMod<0)
-        TmpMod = Low;
-
-      else if(TmpMod>High)
-        TmpMod = High;
-
-      LRMod[k*9+j] = (ogg_int16_t)TmpMod;
-    }
-    Src+=Pitch;
-  }
-
-  for(k=0;k<8;k++){
-    /* In the case that this function called with same buffer for
-     source and destination, To keep the c and the mmx version to have
-     consistant results, intermediate buffer is used to store the
-     eight pixel value before writing them to destination
-     (i.e. Overwriting souce for the speical case) */
-    for(l=0;l<8;l++){
-
-      atot = 128;
-      B = round;
-      p = curRow[ rowOffset +l +1];
-
-      pl = curRow[ rowOffset +l];
-      al = LRMod[k*9+l];
-      atot -= al;
-      B += al * pl;
-
-      pu = lastRow[ rowOffset +l];
-      au = UDMod[k*8+l];
-      atot -= au;
-      B += au * pu;
-
-      pd = nextRow[ rowOffset +l];
-      ad = UDMod[(k+1)*8+l];
-      atot -= ad;
-      B += ad * pd;
-
-      pr = curRow[ rowOffset +l+2];
-      ar = LRMod[k*9+l+1];
-      atot -= ar;
-      B += ar * pr;
-
-      newVal = ( atot * p + B) >> 7;
-
-      dstRow[ rowOffset +l]= clamp255( newVal );
-    }
-    rowOffset += Pitch;
-  }
-}
-
-static void DeringBlockWeak(unsigned char *SrcPtr,
-                            unsigned char *DstPtr,
-                            ogg_int32_t Pitch,
-                            ogg_uint32_t FragQIndex,
-                            const ogg_uint32_t *QuantScale){
-
-  ogg_int16_t UDMod[72];
-  ogg_int16_t LRMod[72];
-  unsigned int j,k;
-  const unsigned char * Src;
-  unsigned int QValue = QuantScale[FragQIndex];
-
-  unsigned char p;
-  unsigned char pl;
-  unsigned char pr;
-  unsigned char pu;
-  unsigned char pd;
-
-  int  al;
-  int  ar;
-  int  au;
-  int  ad;
-
-  int  atot;
-  int  B;
-  int  newVal;
-
-  const unsigned char *curRow = SrcPtr-1;
-  unsigned char *dstRow = DstPtr;
-  const unsigned char *lastRow = SrcPtr-Pitch;
-  const unsigned char *nextRow = SrcPtr+Pitch;
-
-  unsigned int rowOffset = 0;
-  unsigned int round = (1<<6);
-
-  int High;
-  int Low;
-  int TmpMod;
-  int Sharpen = SharpenModifier[FragQIndex];
-
-  High = 3 * QValue;
-  if(High>24)
-    High=24;
-  Low = 0 ;
-
-  /* Initialize the Mod Data */
-  Src=SrcPtr-Pitch;
-  for(k=0;k<9;k++) {
-    for(j=0;j<8;j++) {
-
-      TmpMod = 32 + QValue - 2*(abs(Src[j+Pitch]-Src[j]));
-
-      if(TmpMod< -64)
-        TmpMod = Sharpen;
-
-      else if(TmpMod<Low)
-        TmpMod = Low;
-
-            else if(TmpMod>High)
-              TmpMod = High;
-
-      UDMod[k*8+j] = (ogg_int16_t)TmpMod;
-    }
-    Src +=Pitch;
-  }
-
-  Src = SrcPtr-1;
-
-  for(k=0;k<8;k++){
-    for(j=0;j<9;j++){
-      TmpMod = 32 + QValue - 2*(abs(Src[j+1]-Src[j]));
-
-      if(TmpMod< -64 )
-        TmpMod = Sharpen;
-
-      else if(TmpMod<Low)
-        TmpMod = Low;
-
-      else if(TmpMod>High)
-        TmpMod = High;
-
-      LRMod[k*9+j] = (ogg_int16_t)TmpMod;
-    }
-    Src+=Pitch;
-  }
-
-  for(k=0;k<8;k++) {
-    for(j=0;j<8;j++){
-      atot = 128;
-      B = round;
-      p = curRow[ rowOffset +j+1];
-
-      pl = curRow[ rowOffset +j];
-      al = LRMod[k*9+j];
-      atot -= al;
-      B += al * pl;
-
-      pu = lastRow[ rowOffset +j];
-      au = UDMod[k*8+j];
-      atot -= au;
-      B += au * pu;
-
-      pd = nextRow[ rowOffset +j];
-      ad = UDMod[(k+1)*8+j];
-      atot -= ad;
-      B += ad * pd;
-
-      pr = curRow[ rowOffset +j+2];
-      ar = LRMod[k*9+j+1];
-      atot -= ar;
-      B += ar * pr;
-
-      newVal = ( atot * p + B) >> 7;
-
-      dstRow[ rowOffset +j] = clamp255( newVal );
-    }
-
-    rowOffset += Pitch;
-  }
-}
-
-static void DeringFrame(PB_INSTANCE *pbi,
-                        unsigned char *Src, unsigned char *Dst){
-  ogg_uint32_t  col,row;
-  unsigned char  *SrcPtr;
-  unsigned char  *DestPtr;
-  ogg_uint32_t BlocksAcross,BlocksDown;
-  const ogg_uint32_t *QuantScale;
-  ogg_uint32_t Block;
-  ogg_uint32_t LineLength;
-
-  ogg_int32_t Thresh1,Thresh2,Thresh3,Thresh4;
-
-  Thresh1 = 384;
-  Thresh2 = 4 * Thresh1;
-  Thresh3 = 5 * Thresh2/4;
-  Thresh4 = 5 * Thresh2/2;
-
-  QuantScale = DeringModifierV1;
-
-  BlocksAcross = pbi->HFragments;
-  BlocksDown = pbi->VFragments;
-
-  SrcPtr = Src + pbi->ReconYDataOffset;
-  DestPtr = Dst + pbi->ReconYDataOffset;
-  LineLength = pbi->YStride;
-
-  Block = 0;
-
-  for ( row = 0 ; row < BlocksDown; row ++){
-    for (col = 0; col < BlocksAcross; col ++){
-      ogg_uint32_t Quality = pbi->FragQIndex[Block];
-      ogg_int32_t Variance = pbi->FragmentVariances[Block];
-
-      if( pbi->PostProcessingLevel >5 && Variance > Thresh3 ){
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-
-        if( (col > 0 &&
-             pbi->FragmentVariances[Block-1] > Thresh4 ) ||
-            (col + 1 < BlocksAcross &&
-             pbi->FragmentVariances[Block+1] > Thresh4 ) ||
-            (row + 1 < BlocksDown &&
-             pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
-            (row > 0 &&
-             pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) ){
-
-          DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                            LineLength,Quality,QuantScale);
-          DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                            LineLength,Quality,QuantScale);
-        }
-      } else if(Variance > Thresh2 ) {
-
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-      } else if(Variance > Thresh1 ) {
-
-        DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
-                        LineLength,Quality,QuantScale);
-
-      } else {
-
-        dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
-
-      }
-
-      ++Block;
-
-    }
-    SrcPtr += 8 * LineLength;
-    DestPtr += 8 * LineLength;
-  }
-
-  /* Then U */
-
-  BlocksAcross /= 2;
-  BlocksDown /= 2;
-  LineLength /= 2;
-
-  SrcPtr = Src + pbi->ReconUDataOffset;
-  DestPtr = Dst + pbi->ReconUDataOffset;
-  for ( row = 0 ; row < BlocksDown; row ++) {
-    for (col = 0; col < BlocksAcross; col ++) {
-      ogg_uint32_t Quality = pbi->FragQIndex[Block];
-      ogg_int32_t Variance = pbi->FragmentVariances[Block];
-
-      if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-
-      }else if(Variance > Thresh2 ){
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-      }else if(Variance > Thresh1 ){
-        DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
-                        LineLength,Quality,QuantScale);
-      }else{
-        dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
-      }
-
-      ++Block;
-
-    }
-    SrcPtr += 8 * LineLength;
-    DestPtr += 8 * LineLength;
-  }
-
-  /* Then V */
-  SrcPtr = Src + pbi->ReconVDataOffset;
-  DestPtr = Dst + pbi->ReconVDataOffset;
-
-  for ( row = 0 ; row < BlocksDown; row ++){
-    for (col = 0; col < BlocksAcross; col ++){
-
-      ogg_uint32_t Quality = pbi->FragQIndex[Block];
-      ogg_int32_t Variance = pbi->FragmentVariances[Block];
-
-
-      if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-
-      }else if(Variance > Thresh2 ){
-        DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
-                          LineLength,Quality,QuantScale);
-      }else if(Variance > Thresh1 ){
-        DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
-                        LineLength,Quality,QuantScale);
-      }else{
-        dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
-      }
-
-      ++Block;
-
-    }
-    SrcPtr += 8 * LineLength;
-    DestPtr += 8 * LineLength;
-
-  }
-
-}
-
-void UpdateFragQIndex(PB_INSTANCE *pbi){
-
-  ogg_uint32_t  ThisFrameQIndex;
-  ogg_uint32_t  i;
-
-  /* Check this frame quality  index */
-  ThisFrameQIndex = pbi->FrameQIndex;
-
-
-  /* It is not a key frame, so only reset those are coded */
-  for( i = 0; i < pbi->UnitFragments; i++  )
-    if( pbi->display_fragments[i])
-      pbi->FragQIndex[i] = ThisFrameQIndex;
-
-}
-
-static void DeblockLoopFilteredBand(PB_INSTANCE *pbi,
-                             unsigned char *SrcPtr,
-                             unsigned char *DesPtr,
-                             ogg_uint32_t PlaneLineStep,
-                             ogg_uint32_t FragsAcross,
-                             ogg_uint32_t StartFrag,
-                             const ogg_uint32_t *QuantScale){
-  ogg_uint32_t j,k;
-  ogg_uint32_t CurrentFrag=StartFrag;
-  ogg_int32_t QStep;
-  ogg_int32_t FLimit;
-  unsigned char *Src, *Des;
-  ogg_int32_t  x[10];
-  ogg_int32_t  Sum1, Sum2;
-
-  while(CurrentFrag < StartFrag + FragsAcross){
-
-    Src=SrcPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*5;
-    Des=DesPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*4;
-
-    QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
-    FLimit = ( QStep * 3 ) >> 2;
-
-    for( j=0; j<8 ; j++){
-      x[0] = Src[0];
-      x[1] = Src[PlaneLineStep];
-      x[2] = Src[PlaneLineStep*2];
-      x[3] = Src[PlaneLineStep*3];
-      x[4] = Src[PlaneLineStep*4];
-      x[5] = Src[PlaneLineStep*5];
-      x[6] = Src[PlaneLineStep*6];
-      x[7] = Src[PlaneLineStep*7];
-      x[8] = Src[PlaneLineStep*8];
-      x[9] = Src[PlaneLineStep*9];
-
-      Sum1=Sum2=0;
-
-      for(k=1;k<=4;k++){
-        Sum1 += abs(x[k]-x[k-1]);
-        Sum2 += abs(x[k+4]-x[k+5]);
-      }
-
-      pbi->FragmentVariances[CurrentFrag] +=((Sum1>255)?255:Sum1);
-      pbi->FragmentVariances[CurrentFrag + FragsAcross] += ((Sum2>255)?255:Sum2);
-
-      if( Sum1 < FLimit &&
-          Sum2 < FLimit &&
-          (x[5] - x[4]) < QStep &&
-          (x[4] - x[5]) < QStep ){
-
-        /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
-        Des[0              ] = (x[0] + x[0] +x[0] + x[1] * 2 +
-                                x[2] + x[3] +x[4] + 4) >> 3;
-        Des[PlaneLineStep  ] = (x[0] + x[0] +x[1] + x[2] * 2 +
-                                x[3] + x[4] +x[5] + 4) >> 3;
-        Des[PlaneLineStep*2] = (x[0] + x[1] +x[2] + x[3] * 2 +
-                                x[4] + x[5] +x[6] + 4) >> 3;
-        Des[PlaneLineStep*3] = (x[1] + x[2] +x[3] + x[4] * 2 +
-                                x[5] + x[6] +x[7] + 4) >> 3;
-        Des[PlaneLineStep*4] = (x[2] + x[3] +x[4] + x[5] * 2 +
-                                x[6] + x[7] +x[8] + 4) >> 3;
-        Des[PlaneLineStep*5] = (x[3] + x[4] +x[5] + x[6] * 2 +
-                                x[7] + x[8] +x[9] + 4) >> 3;
-        Des[PlaneLineStep*6] = (x[4] + x[5] +x[6] + x[7] * 2 +
-                                x[8] + x[9] +x[9] + 4) >> 3;
-        Des[PlaneLineStep*7] = (x[5] + x[6] +x[7] + x[8] * 2 +
-                                x[9] + x[9] +x[9] + 4) >> 3;
-
-      }else {
-        /* copy the pixels to destination */
-        Des[0              ]= (unsigned char)x[1];
-        Des[PlaneLineStep  ]= (unsigned char)x[2];
-        Des[PlaneLineStep*2]= (unsigned char)x[3];
-        Des[PlaneLineStep*3]= (unsigned char)x[4];
-        Des[PlaneLineStep*4]= (unsigned char)x[5];
-        Des[PlaneLineStep*5]= (unsigned char)x[6];
-        Des[PlaneLineStep*6]= (unsigned char)x[7];
-        Des[PlaneLineStep*7]= (unsigned char)x[8];
-      }
-      Src ++;
-      Des ++;
-    }
-
-
-    /* done with filtering the horizontal edge, now let's do the
-       vertical one */
-    /* skip the first one */
-    if(CurrentFrag==StartFrag)
-      CurrentFrag++;
-    else{
-      Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
-      Src=Des-5;
-      Des-=4;
-
-      QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
-      FLimit = ( QStep * 3 ) >> 2;
-
-      for( j=0; j<8 ; j++){
-        x[0] = Src[0];
-        x[1] = Src[1];
-        x[2] = Src[2];
-        x[3] = Src[3];
-        x[4] = Src[4];
-        x[5] = Src[5];
-        x[6] = Src[6];
-        x[7] = Src[7];
-        x[8] = Src[8];
-        x[9] = Src[9];
-
-        Sum1=Sum2=0;
-
-        for(k=1;k<=4;k++){
-          Sum1 += abs(x[k]-x[k-1]);
-          Sum2 += abs(x[k+4]-x[k+5]);
-        }
-
-        pbi->FragmentVariances[CurrentFrag-1] += ((Sum1>255)?255:Sum1);
-        pbi->FragmentVariances[CurrentFrag] += ((Sum2>255)?255:Sum2);
-
-        if( Sum1 < FLimit &&
-            Sum2 < FLimit &&
-            (x[5] - x[4]) < QStep &&
-            (x[4] - x[5]) < QStep ){
-
-          /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
-          Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
-          Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
-          Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
-          Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
-          Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
-          Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
-          Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
-          Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
-        }
-
-        Src += PlaneLineStep;
-        Des += PlaneLineStep;
-      }
-      CurrentFrag ++;
-    }
-  }
-}
-
-static void DeblockVerticalEdgesInLoopFilteredBand(PB_INSTANCE *pbi,
-                                            unsigned char *SrcPtr,
-                                            unsigned char *DesPtr,
-                                            ogg_uint32_t PlaneLineStep,
-                                            ogg_uint32_t FragsAcross,
-                                            ogg_uint32_t StartFrag,
-                                            const ogg_uint32_t *QuantScale){
-  ogg_uint32_t j,k;
-  ogg_uint32_t CurrentFrag=StartFrag;
-  ogg_int32_t QStep;
-  ogg_int32_t FLimit;
-  unsigned char *Src, *Des;
-  ogg_int32_t  x[10];
-  ogg_int32_t  Sum1, Sum2;
-
-  while(CurrentFrag < StartFrag + FragsAcross-1) {
-
-    Src=SrcPtr+8*(CurrentFrag-StartFrag+1)-5;
-    Des=DesPtr+8*(CurrentFrag-StartFrag+1)-4;
-
-    QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
-    FLimit = ( QStep * 3)>>2 ;
-
-    for( j=0; j<8 ; j++){
-      x[0] = Src[0];
-      x[1] = Src[1];
-      x[2] = Src[2];
-      x[3] = Src[3];
-      x[4] = Src[4];
-      x[5] = Src[5];
-      x[6] = Src[6];
-      x[7] = Src[7];
-      x[8] = Src[8];
-      x[9] = Src[9];
-
-      Sum1=Sum2=0;
-
-      for(k=1;k<=4;k++){
-        Sum1 += abs(x[k]-x[k-1]);
-        Sum2 += abs(x[k+4]-x[k+5]);
-      }
-
-      pbi->FragmentVariances[CurrentFrag] += ((Sum1>255)?255:Sum1);
-      pbi->FragmentVariances[CurrentFrag+1] += ((Sum2>255)?255:Sum2);
-
-
-      if( Sum1 < FLimit &&
-          Sum2 < FLimit &&
-          (x[5] - x[4]) < QStep &&
-          (x[4] - x[5]) < QStep ){
-
-        /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
-        Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
-        Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
-        Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
-        Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
-        Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
-        Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
-        Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
-        Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
-      }
-      Src +=PlaneLineStep;
-                Des +=PlaneLineStep;
-
-    }
-    CurrentFrag ++;
-  }
-}
-
-static void DeblockPlane(PB_INSTANCE *pbi,
-                  unsigned char *SourceBuffer,
-                  unsigned char *DestinationBuffer,
-                  ogg_uint32_t Channel ){
-
-  ogg_uint32_t i,k;
-  ogg_uint32_t PlaneLineStep=0;
-  ogg_uint32_t StartFrag =0;
-  ogg_uint32_t PixelIndex=0;
-  unsigned char * SrcPtr=0, * DesPtr=0;
-  ogg_uint32_t FragsAcross=0;
-  ogg_uint32_t FragsDown=0;
-  const ogg_uint32_t *QuantScale=0;
-
-  switch( Channel ){
-  case 0:
-    /* Get the parameters */
-    PlaneLineStep = pbi->YStride;
-    FragsAcross = pbi->HFragments;
-    FragsDown = pbi->VFragments;
-    StartFrag = 0;
-    PixelIndex = pbi->ReconYDataOffset;
-    SrcPtr = & SourceBuffer[PixelIndex];
-    DesPtr = & DestinationBuffer[PixelIndex];
-    break;
-
-  case 1:
-    /* Get the parameters */
-    PlaneLineStep = pbi->UVStride;
-    FragsAcross = pbi->HFragments / 2;
-    FragsDown = pbi->VFragments / 2;
-    StartFrag = pbi->YPlaneFragments;
-
-    PixelIndex = pbi->ReconUDataOffset;
-    SrcPtr = & SourceBuffer[PixelIndex];
-    DesPtr = & DestinationBuffer[PixelIndex];
-    break;
-
-  default:
-    /* Get the parameters */
-    PlaneLineStep = pbi->UVStride;
-    FragsAcross = pbi->HFragments / 2;
-    FragsDown = pbi->VFragments / 2;
-    StartFrag =   pbi->YPlaneFragments + pbi->UVPlaneFragments;
-
-    PixelIndex = pbi->ReconVDataOffset;
-    SrcPtr = & SourceBuffer[PixelIndex];
-    DesPtr = & DestinationBuffer[PixelIndex];
-    break;
-  }
-
-  QuantScale = DcQuantScaleV1;
-
-  for(i=0;i<4;i++)
-    memcpy(DesPtr+i*PlaneLineStep, SrcPtr+i*PlaneLineStep, PlaneLineStep);
-
-  k = 1;
-
-  while( k < FragsDown ){
-
-    SrcPtr += 8*PlaneLineStep;
-    DesPtr += 8*PlaneLineStep;
-
-    /* Filter both the horizontal and vertical block edges inside the band */
-    DeblockLoopFilteredBand(pbi, SrcPtr, DesPtr, PlaneLineStep,
-                            FragsAcross, StartFrag, QuantScale);
-
-    /* Move Pointers */
-    StartFrag += FragsAcross;
-
-    k ++;
-  }
-
-  /* The Last band */
-  for(i=0;i<4;i++)
-    memcpy(DesPtr+(i+4)*PlaneLineStep,
-           SrcPtr+(i+4)*PlaneLineStep,
-           PlaneLineStep);
-
-  DeblockVerticalEdgesInLoopFilteredBand(pbi,SrcPtr,DesPtr,PlaneLineStep,
-                                         FragsAcross,StartFrag,QuantScale);
-
-}
-
-static void DeblockFrame(PB_INSTANCE *pbi, unsigned char *SourceBuffer,
-                  unsigned char *DestinationBuffer){
-
-  memset(pbi->FragmentVariances, 0 , sizeof(ogg_int32_t) * pbi->UnitFragments);
-
-
-  UpdateFragQIndex(pbi);
-
-  /* Y */
-  DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 0);
-
-  /* U */
-  DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 1);
-
-  /* V */
-  DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 2);
-
-}
-
-void PostProcess(PB_INSTANCE *pbi){
-
-  switch (pbi->PostProcessingLevel){
-  case 8:
-    /* on a slow machine, use a simpler and faster deblocking filter */
-    DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
-    break;
-
-  case 6:
-    DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
-    UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
-    DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
-    break;
-
-  case 5:
-    DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
-    UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
-    DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
-    break;
-  case 4:
-    DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
-    break;
-  case 1:
-    UpdateFragQIndex(pbi);
-    break;
-
-  case 0:
-    break;
-
-  default:
-    DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
-    UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
-    DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
-    break;
-  }
-}
-

+ 0 - 48
Engine/lib/libtheora/lib/enc/pp.h

@@ -1,48 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: pp.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-/* Constants. */
-#define INTERNAL_BLOCK_HEIGHT   8
-#define INTERNAL_BLOCK_WIDTH    8
-
-
-/* NEW Line search values. */
-#define UP      0
-#define DOWN    1
-#define LEFT    2
-#define RIGHT   3
-
-#define FIRST_ROW           0
-#define NOT_EDGE_ROW        1
-#define LAST_ROW            2
-
-#define YDIFF_CB_ROWS                   (INTERNAL_BLOCK_HEIGHT * 3)
-#define CHLOCALS_CB_ROWS                (INTERNAL_BLOCK_HEIGHT * 3)
-#define PMAP_CB_ROWS                    (INTERNAL_BLOCK_HEIGHT * 3)
-#define PSCORE_CB_ROWS                  (INTERNAL_BLOCK_HEIGHT * 4)
-
-/* Status values in block coding map */
-#define CANDIDATE_BLOCK_LOW                     -2
-#define CANDIDATE_BLOCK                         -1
-#define BLOCK_NOT_CODED                         0
-#define BLOCK_CODED_BAR                         3
-#define BLOCK_CODED_SGC                         4
-#define BLOCK_CODED_LOW                         4
-#define BLOCK_CODED                             5
-
-#define MAX_PREV_FRAMES             16
-#define MAX_SEARCH_LINE_LEN                     7

+ 0 - 43
Engine/lib/libtheora/lib/enc/quant_lookup.h

@@ -1,43 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: quant_lookup.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-#define MIN16 ((1<<16)-1)
-#define SHIFT16 (1<<16)
-
-#define MIN_LEGAL_QUANT_ENTRY 8
-#define MIN_DEQUANT_VAL       2
-#define IDCT_SCALE_FACTOR     2 /* Shift left bits to improve IDCT precision */
-#define OLD_SCHEME            1
-
-
-/******************************
- * lookup table for DCT coefficient zig-zag ordering
- * ****************************/
-
-static const ogg_uint32_t dezigzag_index[64] = {
-  0,  1,  8,  16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4,  5,
-  12, 19, 26, 33, 40, 48, 41, 34,
-  27, 20, 13,  6,  7, 14, 21, 28,
-  35, 42, 49, 56, 57, 50, 43, 36,
-  29, 22, 15, 23, 30, 37, 44, 51,
-  58, 59, 52, 45, 38, 31, 39, 46,
-  53, 60, 61, 54, 47, 55, 62, 63
-};
-

+ 0 - 110
Engine/lib/libtheora/lib/enc/reconstruct.c

@@ -1,110 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: reconstruct.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-static void copy8x8__c (unsigned char *src,
-                  unsigned char *dest,
-                  unsigned int stride)
-{
-  int j;
-  for ( j = 0; j < 8; j++ ){
-    ((ogg_uint32_t*)dest)[0] = ((ogg_uint32_t*)src)[0];
-    ((ogg_uint32_t*)dest)[1] = ((ogg_uint32_t*)src)[1];
-    src+=stride;
-    dest+=stride;
-  }
-}
-
-static void recon_intra8x8__c (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-                               ogg_uint32_t LineStep)
-{
-  ogg_uint32_t i;
-
-  for (i = 8; i; i--){
-    /* Convert the data back to 8 bit unsigned */
-    /* Saturate the output to unsigend 8 bit values */
-    ReconPtr[0] = clamp255( ChangePtr[0] + 128 );
-    ReconPtr[1] = clamp255( ChangePtr[1] + 128 );
-    ReconPtr[2] = clamp255( ChangePtr[2] + 128 );
-    ReconPtr[3] = clamp255( ChangePtr[3] + 128 );
-    ReconPtr[4] = clamp255( ChangePtr[4] + 128 );
-    ReconPtr[5] = clamp255( ChangePtr[5] + 128 );
-    ReconPtr[6] = clamp255( ChangePtr[6] + 128 );
-    ReconPtr[7] = clamp255( ChangePtr[7] + 128 );
-
-    ReconPtr += LineStep;
-    ChangePtr += 8;
-  }
-}
-
-static void recon_inter8x8__c (unsigned char *ReconPtr, unsigned char *RefPtr,
-          ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-  ogg_uint32_t i;
-
-  for (i = 8; i; i--){
-    ReconPtr[0] = clamp255(RefPtr[0] + ChangePtr[0]);
-    ReconPtr[1] = clamp255(RefPtr[1] + ChangePtr[1]);
-    ReconPtr[2] = clamp255(RefPtr[2] + ChangePtr[2]);
-    ReconPtr[3] = clamp255(RefPtr[3] + ChangePtr[3]);
-    ReconPtr[4] = clamp255(RefPtr[4] + ChangePtr[4]);
-    ReconPtr[5] = clamp255(RefPtr[5] + ChangePtr[5]);
-    ReconPtr[6] = clamp255(RefPtr[6] + ChangePtr[6]);
-    ReconPtr[7] = clamp255(RefPtr[7] + ChangePtr[7]);
-
-    ChangePtr += 8;
-    ReconPtr += LineStep;
-    RefPtr += LineStep;
-  }
-}
-
-static void recon_inter8x8_half__c (unsigned char *ReconPtr, unsigned char *RefPtr1,
-               unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-         ogg_uint32_t LineStep)
-{
-  ogg_uint32_t  i;
-
-  for (i = 8; i; i--){
-    ReconPtr[0] = clamp255((((int)RefPtr1[0] + (int)RefPtr2[0]) >> 1) + ChangePtr[0] );
-    ReconPtr[1] = clamp255((((int)RefPtr1[1] + (int)RefPtr2[1]) >> 1) + ChangePtr[1] );
-    ReconPtr[2] = clamp255((((int)RefPtr1[2] + (int)RefPtr2[2]) >> 1) + ChangePtr[2] );
-    ReconPtr[3] = clamp255((((int)RefPtr1[3] + (int)RefPtr2[3]) >> 1) + ChangePtr[3] );
-    ReconPtr[4] = clamp255((((int)RefPtr1[4] + (int)RefPtr2[4]) >> 1) + ChangePtr[4] );
-    ReconPtr[5] = clamp255((((int)RefPtr1[5] + (int)RefPtr2[5]) >> 1) + ChangePtr[5] );
-    ReconPtr[6] = clamp255((((int)RefPtr1[6] + (int)RefPtr2[6]) >> 1) + ChangePtr[6] );
-    ReconPtr[7] = clamp255((((int)RefPtr1[7] + (int)RefPtr2[7]) >> 1) + ChangePtr[7] );
-
-    ChangePtr += 8;
-    ReconPtr += LineStep;
-    RefPtr1 += LineStep;
-    RefPtr2 += LineStep;
-  }
-}
-
-void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
-{
-  funcs->copy8x8 = copy8x8__c;
-  funcs->recon_intra8x8 = recon_intra8x8__c;
-  funcs->recon_inter8x8 = recon_inter8x8__c;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__c;
-#if defined(USE_ASM)
-  if (cpu_flags & OC_CPU_X86_MMX) {
-    dsp_mmx_recon_init(funcs);
-  }
-#endif
-}

+ 0 - 2301
Engine/lib/libtheora/lib/enc/scan.c

@@ -1,2301 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: scan.c 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-#include "codec_internal.h"
-#include "dsp.h"
-
-#define MAX_SEARCH_LINE_LEN                   7
-
-#define SET8_0(ptr) \
-  ((ogg_uint32_t *)ptr)[0] = 0x00000000; \
-  ((ogg_uint32_t *)ptr)[1] = 0x00000000;
-#define SET8_1(ptr) \
-  ((ogg_uint32_t *)ptr)[0] = 0x01010101; \
-  ((ogg_uint32_t *)ptr)[1] = 0x01010101;
-#define SET8_8(ptr) \
-  ((ogg_uint32_t *)ptr)[0] = 0x08080808; \
-  ((ogg_uint32_t *)ptr)[1] = 0x08080808;
-
-static ogg_uint32_t LineLengthScores[ MAX_SEARCH_LINE_LEN + 1 ] = {
-  0, 0, 0, 0, 2, 4, 12, 24
-};
-
-static ogg_uint32_t BodyNeighbourScore = 8;
-static double DiffDevisor = 0.0625;
-#define HISTORY_BLOCK_FACTOR    2
-#define MIN_STEP_THRESH 6
-#define SCORE_MULT_LOW    0.5
-#define SCORE_MULT_HIGH   4
-
-#define UP      0
-#define DOWN    1
-#define LEFT    2
-#define RIGHT   3
-
-#define INTERNAL_BLOCK_HEIGHT   8
-#define INTERNAL_BLOCK_WIDTH    8
-
-#define BLOCK_NOT_CODED                       0
-#define BLOCK_CODED_BAR                       3
-#define BLOCK_CODED_SGC                       4
-#define BLOCK_CODED_LOW                       4
-#define BLOCK_CODED                           5
-
-#define CANDIDATE_BLOCK_LOW                  -2
-#define CANDIDATE_BLOCK                      -1
-
-#define FIRST_ROW           0
-#define NOT_EDGE_ROW        1
-#define LAST_ROW            2
-
-#define YDIFF_CB_ROWS                   (INTERNAL_BLOCK_HEIGHT * 3)
-#define CHLOCALS_CB_ROWS                (INTERNAL_BLOCK_HEIGHT * 3)
-#define PMAP_CB_ROWS                    (INTERNAL_BLOCK_HEIGHT * 3)
-
-void ConfigurePP( PP_INSTANCE *ppi, int Level ) {
-  switch ( Level ){
-  case 0:
-    ppi->SRFGreyThresh = 1;
-    ppi->SRFColThresh = 1;
-    ppi->NoiseSupLevel = 2;
-    ppi->SgcLevelThresh = 1;
-    ppi->SuvcLevelThresh = 1;
-    ppi->GrpLowSadThresh = 6;
-    ppi->GrpHighSadThresh = 24;
-    ppi->PrimaryBlockThreshold = 2;
-    ppi->SgcThresh = 10;
-
-    ppi->PAKEnabled = 0;
-    break;
-
-  case 1:
-    ppi->SRFGreyThresh = 2;
-    ppi->SRFColThresh = 2;
-    ppi->NoiseSupLevel = 2;
-    ppi->SgcLevelThresh = 2;
-    ppi->SuvcLevelThresh = 2;
-    ppi->GrpLowSadThresh = 8;
-    ppi->GrpHighSadThresh = 32;
-    ppi->PrimaryBlockThreshold = 5;
-    ppi->SgcThresh = 12;
-
-    ppi->PAKEnabled = 1;
-    break;
-
-  case 2: /* Default VP3 settings */
-    ppi->SRFGreyThresh = 3;
-    ppi->SRFColThresh = 3;
-    ppi->NoiseSupLevel = 2;
-    ppi->SgcLevelThresh = 2;
-    ppi->SuvcLevelThresh = 2;
-    ppi->GrpLowSadThresh = 8;
-    ppi->GrpHighSadThresh = 32;
-    ppi->PrimaryBlockThreshold = 5;
-    ppi->SgcThresh = 16;
-
-    ppi->PAKEnabled = 1;
-    break;
-
-  case 3:
-    ppi->SRFGreyThresh = 4;
-    ppi->SRFColThresh = 4;
-    ppi->NoiseSupLevel = 3;
-    ppi->SgcLevelThresh = 3;
-    ppi->SuvcLevelThresh = 3;
-    ppi->GrpLowSadThresh = 10;
-    ppi->GrpHighSadThresh = 48;
-    ppi->PrimaryBlockThreshold = 5;
-    ppi->SgcThresh = 18;
-
-    ppi->PAKEnabled = 1;
-    break;
-
-  case 4:
-    ppi->SRFGreyThresh = 5;
-    ppi->SRFColThresh = 5;
-    ppi->NoiseSupLevel = 3;
-    ppi->SgcLevelThresh = 4;
-    ppi->SuvcLevelThresh = 4;
-    ppi->GrpLowSadThresh = 12;
-    ppi->GrpHighSadThresh = 48;
-    ppi->PrimaryBlockThreshold = 5;
-    ppi->SgcThresh = 20;
-
-    ppi->PAKEnabled = 1;
-    break;
-
-  case 5:
-    ppi->SRFGreyThresh = 6;
-    ppi->SRFColThresh = 6;
-    ppi->NoiseSupLevel = 3;
-    ppi->SgcLevelThresh = 4;
-    ppi->SuvcLevelThresh = 4;
-    ppi->GrpLowSadThresh = 12;
-    ppi->GrpHighSadThresh = 64;
-    ppi->PrimaryBlockThreshold = 10;
-    ppi->SgcThresh = 24;
-
-    ppi->PAKEnabled = 1;
-    break;
-
-  case 6:
-    ppi->SRFGreyThresh = 6;
-    ppi->SRFColThresh = 7;
-    ppi->NoiseSupLevel = 3;
-    ppi->SgcLevelThresh = 4;
-    ppi->SuvcLevelThresh = 4;
-    ppi->GrpLowSadThresh = 12;
-    ppi->GrpHighSadThresh = 64;
-    ppi->PrimaryBlockThreshold = 10;
-    ppi->SgcThresh = 24;
-
-    ppi->PAKEnabled = 1;
-    break;
-
-  default:
-    ppi->SRFGreyThresh = 3;
-    ppi->SRFColThresh = 3;
-    ppi->NoiseSupLevel = 2;
-    ppi->SgcLevelThresh = 2;
-    ppi->SuvcLevelThresh = 2;
-    ppi->GrpLowSadThresh = 10;
-    ppi->GrpHighSadThresh = 32;
-    ppi->PrimaryBlockThreshold = 5;
-    ppi->SgcThresh = 16;
-    ppi->PAKEnabled = 1;
-    break;
-  }
-}
-
-static void ScanCalcPixelIndexTable(PP_INSTANCE *ppi){
-  ogg_uint32_t i;
-  ogg_uint32_t * PixelIndexTablePtr = ppi->ScanPixelIndexTable;
-
-  /* If appropriate add on extra inices for U and V planes. */
-  for ( i = 0; i < (ppi->ScanYPlaneFragments); i++ ) {
-    PixelIndexTablePtr[ i ] =
-      ((i / ppi->ScanHFragments) *
-       VFRAGPIXELS * ppi->ScanConfig.VideoFrameWidth);
-    PixelIndexTablePtr[ i ] +=
-      ((i % ppi->ScanHFragments) * HFRAGPIXELS);
-  }
-
-  PixelIndexTablePtr = &ppi->ScanPixelIndexTable[ppi->ScanYPlaneFragments];
-
-  for ( i = 0; i < (ppi->ScanUVPlaneFragments * 2); i++ ){
-    PixelIndexTablePtr[ i ] =
-      ((i / (ppi->ScanHFragments >> 1) ) *
-       (VFRAGPIXELS * (ppi->ScanConfig.VideoFrameWidth >> 1)) );
-    PixelIndexTablePtr[ i ] +=
-      ((i % (ppi->ScanHFragments >> 1) ) *
-       HFRAGPIXELS) + ppi->YFramePixels;
-    }
-}
-
-static void InitScanMapArrays(PP_INSTANCE *ppi){
-  int i;
-  unsigned char StepThresh;
-
-  /* Clear down the fragment level map arrays for the current frame. */
-  memset( ppi->FragScores, 0,
-          ppi->ScanFrameFragments * sizeof(*ppi->FragScores) );
-  memset( ppi->SameGreyDirPixels, 0,
-          ppi->ScanFrameFragments );
-  memset( ppi->FragDiffPixels, 0,
-          ppi->ScanFrameFragments );
-  memset( ppi->RowChangedPixels, 0,
-          3* ppi->ScanConfig.VideoFrameHeight*sizeof(*ppi->RowChangedPixels));
-
-  memset( ppi->ScanDisplayFragments, BLOCK_NOT_CODED, ppi->ScanFrameFragments);
-
-  /* Threshold used in setting up ppi->NoiseScoreBoostTable[] */
-  StepThresh = (unsigned int)(ppi->SRFGreyThresh >> 1);
-  if ( StepThresh < MIN_STEP_THRESH )
-    StepThresh = MIN_STEP_THRESH;
-  ppi->SrfThresh = (int)ppi->SRFGreyThresh;
-
-  /* Set up various tables used to tweak pixel score values and
-     scoring rules based upon absolute value of a pixel change */
-  for ( i = 0; i < 256; i++ ){
-    /* Score multiplier table indexed by absolute difference. */
-    ppi->AbsDiff_ScoreMultiplierTable[i] = (double)i * DiffDevisor;
-    if ( ppi->AbsDiff_ScoreMultiplierTable[i] < SCORE_MULT_LOW )
-      ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_LOW;
-    else if ( ppi->AbsDiff_ScoreMultiplierTable[i] > SCORE_MULT_HIGH)
-      ppi->AbsDiff_ScoreMultiplierTable[i] = SCORE_MULT_HIGH;
-
-    /* Table that facilitates a relaxation of the changed locals rules
-       in NoiseScoreRow() for pixels that have changed by a large
-       amount. */
-    if ( i < (ppi->SrfThresh + StepThresh) )
-      ppi->NoiseScoreBoostTable[i] = 0;
-    else if ( i < (ppi->SrfThresh + (StepThresh * 4)) )
-      ppi->NoiseScoreBoostTable[i] = 1;
-    else if ( i < (ppi->SrfThresh + (StepThresh * 6)) )
-      ppi->NoiseScoreBoostTable[i] = 2;
-    else
-      ppi->NoiseScoreBoostTable[i] = 3;
-
-  }
-
-  /* Set various other threshold parameters. */
-
-  /* Set variables that control access to the line search algorithms. */
-  ppi->LineSearchTripTresh = 16;
-  if ( ppi->LineSearchTripTresh > ppi->PrimaryBlockThreshold )
-    ppi->LineSearchTripTresh = (unsigned int)(ppi->PrimaryBlockThreshold + 1);
-
-  /* Adjust line search length if block threshold low */
-  ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
-  while ( (ppi->MaxLineSearchLen > 0) &&
-          (LineLengthScores[ppi->MaxLineSearchLen-1] >
-           ppi->PrimaryBlockThreshold) )
-    ppi->MaxLineSearchLen -= 1;
-
-}
-
-void ScanYUVInit( PP_INSTANCE *  ppi, SCAN_CONFIG_DATA * ScanConfigPtr){
-  int i;
-
-  /* Set up the various imported data structure pointers. */
-  ppi->ScanConfig.Yuv0ptr = ScanConfigPtr->Yuv0ptr;
-  ppi->ScanConfig.Yuv1ptr = ScanConfigPtr->Yuv1ptr;
-  ppi->ScanConfig.SrfWorkSpcPtr = ScanConfigPtr->SrfWorkSpcPtr;
-  ppi->ScanConfig.disp_fragments = ScanConfigPtr->disp_fragments;
-
-  ppi->ScanConfig.RegionIndex = ScanConfigPtr->RegionIndex;
-
-  ppi->ScanConfig.VideoFrameWidth = ScanConfigPtr->VideoFrameWidth;
-  ppi->ScanConfig.VideoFrameHeight = ScanConfigPtr->VideoFrameHeight;
-
-  /* UV plane sizes. */
-  ppi->VideoUVPlaneWidth = ScanConfigPtr->VideoFrameWidth / 2;
-  ppi->VideoUVPlaneHeight = ScanConfigPtr->VideoFrameHeight / 2;
-
-  /* Note the size of each plane in pixels. */
-  ppi->YFramePixels = ppi->ScanConfig.VideoFrameWidth *
-    ppi->ScanConfig.VideoFrameHeight;
-  ppi->UVFramePixels = ppi->VideoUVPlaneWidth * ppi->VideoUVPlaneHeight;
-
-  /* Work out various fragment related values. */
-  ppi->ScanYPlaneFragments = ppi->YFramePixels /
-    (HFRAGPIXELS * VFRAGPIXELS);
-  ppi->ScanUVPlaneFragments = ppi->UVFramePixels /
-    (HFRAGPIXELS * VFRAGPIXELS);;
-  ppi->ScanHFragments = ppi->ScanConfig.VideoFrameWidth / HFRAGPIXELS;
-  ppi->ScanVFragments = ppi->ScanConfig.VideoFrameHeight / VFRAGPIXELS;
-  ppi->ScanFrameFragments = ppi->ScanYPlaneFragments +
-    (2 * ppi->ScanUVPlaneFragments);
-
-  PInitFrameInfo(ppi);
-
-  /* Set up the scan pixel index table. */
-  ScanCalcPixelIndexTable(ppi);
-
-  /* Initialise the previous frame block history lists */
-  for ( i = 0; i < MAX_PREV_FRAMES; i++ )
-    memset( ppi->PrevFragments[i], BLOCK_NOT_CODED, ppi->ScanFrameFragments);
-
-  /* YUVAnalyseFrame() is not called for the first frame in a sequence
-     (a key frame obviously).  This memset insures that for the second
-     frame all blocks are marked for coding in line with the behaviour
-     for other key frames. */
-  memset( ppi->PrevFragments[ppi->PrevFrameLimit-1],
-          BLOCK_CODED, ppi->ScanFrameFragments );
-
-  /* Initialise scan arrays */
-  InitScanMapArrays(ppi);
-}
-
-static void SetFromPrevious(PP_INSTANCE *ppi) {
-  unsigned int  i,j;
-
-  /* We buld up the list of previously updated blocks in the zero
-     index list of PrevFragments[] so we must start by reseting its
-     contents */
-  memset( ppi->PrevFragments[0], BLOCK_NOT_CODED, ppi->ScanFrameFragments );
-
-  if ( ppi->PrevFrameLimit > 1 ){
-    /* Now build up PrevFragments[0] from PrevFragments[1 to PrevFrameLimit] */
-    for ( i = 0; i < ppi->ScanFrameFragments; i++ ){
-      for ( j = 1; j < ppi->PrevFrameLimit; j++ ){
-        if ( ppi->PrevFragments[j][i] > BLOCK_CODED_BAR ){
-          ppi->PrevFragments[0][i] = BLOCK_CODED;
-          break;
-        }
-      }
-    }
-  }
-}
-
-static void UpdatePreviousBlockLists(PP_INSTANCE *ppi) {
-  int  i;
-
-  /* Shift previous frame block lists along. */
-  for ( i = ppi->PrevFrameLimit; i > 1; i-- ){
-    memcpy( ppi->PrevFragments[i], ppi->PrevFragments[i-1],
-            ppi->ScanFrameFragments );
-  }
-
-  /* Now copy in this frames block list */
-  memcpy( ppi->PrevFragments[1], ppi->ScanDisplayFragments,
-          ppi->ScanFrameFragments );
-}
-
-static void CreateOutputDisplayMap( PP_INSTANCE *ppi,
-                                    signed char *InternalFragmentsPtr,
-                                    signed char *RecentHistoryPtr,
-                                    unsigned char *ExternalFragmentsPtr ) {
-  ogg_uint32_t i;
-  ogg_uint32_t HistoryBlocksAdded = 0;
-  ogg_uint32_t YBand =  (ppi->ScanYPlaneFragments/8);   /* 1/8th of Y image. */
-
-  ppi->OutputBlocksUpdated = 0;
-  for ( i = 0; i < ppi->ScanFrameFragments; i++ ) {
-    if ( InternalFragmentsPtr[i] > BLOCK_NOT_CODED ) {
-      ppi->OutputBlocksUpdated ++;
-      ExternalFragmentsPtr[i] = 1;
-    }else if ( RecentHistoryPtr[i] == BLOCK_CODED ){
-      HistoryBlocksAdded ++;
-      ExternalFragmentsPtr[i] = 1;
-    }else{
-      ExternalFragmentsPtr[i] = 0;
-    }
-  }
-
-  /* Add in a weighting for the history blocks that have been added */
-  ppi->OutputBlocksUpdated += (HistoryBlocksAdded / HISTORY_BLOCK_FACTOR);
-
-  /* Now calculate a key frame candidate indicator.  This is based
-     upon Y data only and ignores the top and bottom 1/8 of the
-     image.  Also ignore history blocks and BAR blocks. */
-  ppi->KFIndicator = 0;
-  for ( i = YBand; i < (ppi->ScanYPlaneFragments - YBand); i++ )
-    if ( InternalFragmentsPtr[i] > BLOCK_CODED_BAR )
-      ppi->KFIndicator ++;
-
-  /* Convert the KF score to a range 0-100 */
-  ppi->KFIndicator = ((ppi->KFIndicator*100)/((ppi->ScanYPlaneFragments*3)/4));
-}
-
-static int RowSadScan( PP_INSTANCE *ppi,
-                       unsigned char * YuvPtr1,
-                       unsigned char * YuvPtr2,
-                       signed char *  DispFragPtr){
-  ogg_int32_t    i, j;
-  ogg_uint32_t   GrpSad;
-  ogg_uint32_t   LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
-  ogg_uint32_t   LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
-  signed char   *LocalDispFragPtr;
-  unsigned char *LocalYuvPtr1;
-  unsigned char *LocalYuvPtr2;
-
-  int           InterestingBlocksInRow = 0;
-
-  /* For each row of pixels in the row of blocks */
-  for ( j = 0; j < VFRAGPIXELS; j++ ){
-    /* Set local block map pointer. */
-    LocalDispFragPtr = DispFragPtr;
-
-    /* Set the local pixel data pointers for this row.*/
-    LocalYuvPtr1 = YuvPtr1;
-    LocalYuvPtr2 = YuvPtr2;
-
-    /* Scan along the row of pixels If the block to which a group of
-       pixels belongs is already marked for update then do nothing. */
-    for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
-      if ( *LocalDispFragPtr <= BLOCK_NOT_CODED ){
-        /* Calculate the SAD score for the block row */
-        GrpSad = dsp_row_sad8(ppi->dsp, LocalYuvPtr1,LocalYuvPtr2);
-
-        /* Now test the group SAD score */
-        if ( GrpSad > LocalGrpLowSadThresh ){
-          /* If SAD very high we must update else we have candidate block */
-          if ( GrpSad > LocalGrpHighSadThresh ){
-            /* Force update */
-            *LocalDispFragPtr = BLOCK_CODED;
-          }else{
-            /* Possible Update required */
-            *LocalDispFragPtr = CANDIDATE_BLOCK;
-          }
-          InterestingBlocksInRow = 1;
-        }
-      }
-      LocalDispFragPtr++;
-
-      LocalYuvPtr1 += 8;
-      LocalYuvPtr2 += 8;
-    }
-
-    /* Increment the base data pointers to the start of the next line. */
-    YuvPtr1 += ppi->PlaneStride;
-    YuvPtr2 += ppi->PlaneStride;
-  }
-
-  return InterestingBlocksInRow;
-
-}
-
-static int ColSadScan( PP_INSTANCE *ppi,
-                       unsigned char * YuvPtr1,
-                       unsigned char * YuvPtr2,
-                       signed char *  DispFragPtr ){
-  ogg_int32_t     i;
-  ogg_uint32_t    MaxSad;
-  ogg_uint32_t    LocalGrpLowSadThresh = ppi->ModifiedGrpLowSadThresh;
-  ogg_uint32_t    LocalGrpHighSadThresh = ppi->ModifiedGrpHighSadThresh;
-  signed char   * LocalDispFragPtr;
-
-  unsigned char * LocalYuvPtr1;
-  unsigned char * LocalYuvPtr2;
-
-  int     InterestingBlocksInRow = 0;
-
-  /* Set the local pixel data pointers for this row. */
-  LocalYuvPtr1 = YuvPtr1;
-  LocalYuvPtr2 = YuvPtr2;
-
-  /* Set local block map pointer. */
-  LocalDispFragPtr = DispFragPtr;
-
-  /* Scan along the row of blocks */
-  for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
-    /* Skip if block already marked to be coded. */
-    if ( *LocalDispFragPtr <= BLOCK_NOT_CODED ){
-      /* Calculate the SAD score for the block column */
-      MaxSad = dsp_col_sad8x8(ppi->dsp, LocalYuvPtr1, LocalYuvPtr2, ppi->PlaneStride );
-
-      /* Now test the group SAD score */
-      if ( MaxSad > LocalGrpLowSadThresh ){
-        /* If SAD very high we must update else we have candidate block */
-        if ( MaxSad > LocalGrpHighSadThresh ){
-          /* Force update */
-          *LocalDispFragPtr = BLOCK_CODED;
-        }else{
-          /* Possible Update required */
-          *LocalDispFragPtr = CANDIDATE_BLOCK;
-        }
-        InterestingBlocksInRow = 1;
-      }
-    }
-
-    /* Increment the block map pointer. */
-    LocalDispFragPtr++;
-
-    /* Step data pointers on ready for next block */
-    LocalYuvPtr1 += HFRAGPIXELS;
-    LocalYuvPtr2 += HFRAGPIXELS;
-  }
-
-  return InterestingBlocksInRow;
-}
-
-static void SadPass2( PP_INSTANCE *ppi,
-                      ogg_int32_t RowNumber,
-                      signed char *  DispFragPtr ){
-  ogg_int32_t  i;
-
-  /* First row */
-  if ( RowNumber == 0 ) {
-    /* First block in row. */
-    if ( DispFragPtr[0] == CANDIDATE_BLOCK ){
-      if ( (DispFragPtr[1] == BLOCK_CODED) ||
-           (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
-           (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) ){
-        ppi->TmpCodedMap[0] =  BLOCK_CODED_LOW;
-      }else{
-        ppi->TmpCodedMap[0] = DispFragPtr[0];
-      }
-    }else{
-      ppi->TmpCodedMap[0] = DispFragPtr[0];
-    }
-
-    /* All but first and last in row */
-    for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){
-      if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
-        if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
-             (DispFragPtr[i+1] == BLOCK_CODED) ||
-             (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
-             (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
-             (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) ){
-          ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
-        }else{
-          ppi->TmpCodedMap[i] = DispFragPtr[i];
-        }
-      }else{
-        ppi->TmpCodedMap[i] = DispFragPtr[i];
-      }
-    }
-
-    /* Last block in row. */
-    i = ppi->PlaneHFragments-1;
-    if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
-      if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
-           (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
-           (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ){
-        ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
-      }else{
-        ppi->TmpCodedMap[i] = DispFragPtr[i];
-      }
-    }else{
-      ppi->TmpCodedMap[i] = DispFragPtr[i];
-    }
-  }else if ( RowNumber < (ppi->PlaneVFragments - 1) ){
-    /* General case */
-    /* First block in row. */
-    if ( DispFragPtr[0] == CANDIDATE_BLOCK ){
-      if ( (DispFragPtr[1] == BLOCK_CODED) ||
-           (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
-           (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED) ||
-           (DispFragPtr[ppi->PlaneHFragments] == BLOCK_CODED) ||
-           (DispFragPtr[ppi->PlaneHFragments+1] == BLOCK_CODED) ){
-        ppi->TmpCodedMap[0] =  BLOCK_CODED_LOW;
-      }else{
-        ppi->TmpCodedMap[0] = DispFragPtr[0];
-      }
-    }else{
-      ppi->TmpCodedMap[0] = DispFragPtr[0];
-    }
-
-    /* All but first and last in row */
-    for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){
-      if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
-        if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
-             (DispFragPtr[i+1] == BLOCK_CODED) ||
-             (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
-             (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
-             (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) ||
-             (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
-             (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ||
-             (DispFragPtr[i+ppi->PlaneHFragments+1] == BLOCK_CODED) ){
-          ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
-        }else{
-          ppi->TmpCodedMap[i] = DispFragPtr[i];
-        }
-      }else{
-        ppi->TmpCodedMap[i] = DispFragPtr[i];
-      }
-    }
-
-    /* Last block in row. */
-    i = ppi->PlaneHFragments-1;
-    if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
-      if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
-           (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
-           (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
-           (DispFragPtr[i+ppi->PlaneHFragments] == BLOCK_CODED) ||
-           (DispFragPtr[i+ppi->PlaneHFragments-1] == BLOCK_CODED) ){
-        ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
-      }else{
-        ppi->TmpCodedMap[i] = DispFragPtr[i];
-      }
-    }else{
-      ppi->TmpCodedMap[i] = DispFragPtr[i];
-    }
-  }else{
-    /* Last row */
-    /* First block in row. */
-    if ( DispFragPtr[0] == CANDIDATE_BLOCK ){
-      if ( (DispFragPtr[1] == BLOCK_CODED) ||
-           (DispFragPtr[(-ppi->PlaneHFragments)] == BLOCK_CODED) ||
-           (DispFragPtr[(-ppi->PlaneHFragments)+1] == BLOCK_CODED)){
-        ppi->TmpCodedMap[0] =  BLOCK_CODED_LOW;
-      }else{
-        ppi->TmpCodedMap[0] = DispFragPtr[0];
-      }
-    }else{
-      ppi->TmpCodedMap[0] = DispFragPtr[0];
-    }
-
-    /* All but first and last in row */
-    for ( i = 1; (i < ppi->PlaneHFragments-1); i++ ){
-      if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
-        if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
-             (DispFragPtr[i+1] == BLOCK_CODED) ||
-             (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
-             (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ||
-             (DispFragPtr[i-ppi->PlaneHFragments+1] == BLOCK_CODED) ){
-          ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
-        }else{
-          ppi->TmpCodedMap[i] = DispFragPtr[i];
-        }
-      }else{
-        ppi->TmpCodedMap[i] = DispFragPtr[i];
-      }
-    }
-
-    /* Last block in row. */
-    i = ppi->PlaneHFragments-1;
-    if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
-      if ( (DispFragPtr[i-1] == BLOCK_CODED) ||
-           (DispFragPtr[i-ppi->PlaneHFragments] == BLOCK_CODED) ||
-           (DispFragPtr[i-ppi->PlaneHFragments-1] == BLOCK_CODED) ){
-        ppi->TmpCodedMap[i] =  BLOCK_CODED_LOW;
-      }else{
-        ppi->TmpCodedMap[i] = DispFragPtr[i];
-      }
-    }else{
-      ppi->TmpCodedMap[i] = DispFragPtr[i];
-    }
-  }
-
-  /* Now copy back the modified Fragment data */
-  memcpy( &DispFragPtr[0], &ppi->TmpCodedMap[0], (ppi->PlaneHFragments) );
-}
-
-static unsigned char ApplyPakLowPass( PP_INSTANCE *ppi,
-                                      unsigned char * SrcPtr ){
-  unsigned char * SrcPtr1 = SrcPtr - 1;
-  unsigned char * SrcPtr0 = SrcPtr1 - ppi->PlaneStride; /* Note the
-                                                           use of
-                                                           stride not
-                                                           width. */
-  unsigned char * SrcPtr2 = SrcPtr1 + ppi->PlaneStride;
-
-  return  (unsigned char)( ( (ogg_uint32_t)SrcPtr0[0] +
-              (ogg_uint32_t)SrcPtr0[1] +
-              (ogg_uint32_t)SrcPtr0[2] +
-              (ogg_uint32_t)SrcPtr1[0] +
-              (ogg_uint32_t)SrcPtr1[2] +
-              (ogg_uint32_t)SrcPtr2[0] +
-              (ogg_uint32_t)SrcPtr2[1] +
-              (ogg_uint32_t)SrcPtr2[2]   ) >> 3 );
-
-}
-
-static void RowDiffScan( PP_INSTANCE *ppi,
-                         unsigned char * YuvPtr1,
-                         unsigned char * YuvPtr2,
-                         ogg_int16_t   * YUVDiffsPtr,
-                         unsigned char * bits_map_ptr,
-                         signed char   * SgcPtr,
-                         signed char   * DispFragPtr,
-                         unsigned char * FDiffPixels,
-                         ogg_int32_t   * RowDiffsPtr,
-                         unsigned char * ChLocalsPtr, int EdgeRow ){
-
-  ogg_int32_t    i,j;
-  ogg_int32_t    FragChangedPixels;
-
-  ogg_int16_t Diff;     /* Temp local workspace. */
-
-  /* Cannot use kernel if at edge or if PAK disabled */
-  if ( (!ppi->PAKEnabled) || EdgeRow ){
-    for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
-      /* Reset count of pixels changed for the current fragment. */
-      FragChangedPixels = 0;
-
-      /* Test for break out conditions to save time. */
-      if (*DispFragPtr == CANDIDATE_BLOCK){
-
-        /* Clear down entries in changed locals array */
-        SET8_0(ChLocalsPtr);
-
-        for ( j = 0; j < HFRAGPIXELS; j++ ){
-          /* Take a local copy of the measured difference. */
-          Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
-          /* Store the actual difference value */
-          YUVDiffsPtr[j] = Diff;
-
-          /* Test against the Level thresholds and record the results */
-          SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
-          /* Test against the SRF thresholds */
-          bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
-          FragChangedPixels += ppi->SrfThreshTable[Diff+255];
-        }
-      }else{
-        /* If we are breaking out here mark all pixels as changed. */
-        if ( *DispFragPtr > BLOCK_NOT_CODED ){
-          SET8_1(bits_map_ptr);
-          SET8_8(ChLocalsPtr);
-        }else{
-          SET8_0(ChLocalsPtr);
-        }
-      }
-
-      *RowDiffsPtr += FragChangedPixels;
-      *FDiffPixels += (unsigned char)FragChangedPixels;
-
-      YuvPtr1 += HFRAGPIXELS;
-      YuvPtr2 += HFRAGPIXELS;
-      bits_map_ptr += HFRAGPIXELS;
-      ChLocalsPtr += HFRAGPIXELS;
-      YUVDiffsPtr += HFRAGPIXELS;
-      SgcPtr ++;
-      FDiffPixels ++;
-
-      /* If we have a lot of changed pixels for this fragment on this
-         row then the fragment is almost sure to be picked (e.g. through
-         the line search) so we can mark it as selected and then ignore
-         it. */
-      if (FragChangedPixels >= 7){
-        *DispFragPtr = BLOCK_CODED_LOW;
-      }
-      DispFragPtr++;
-    }
-  }else{
-
-    /*************************************************************/
-    /* First fragment of row !! */
-
-    i = 0;
-    /* Reset count of pixels changed for the current fragment. */
-    FragChangedPixels = 0;
-
-    /* Test for break out conditions to save time. */
-    if (*DispFragPtr == CANDIDATE_BLOCK){
-      /* Clear down entries in changed locals array */
-      SET8_0(ChLocalsPtr);
-
-      for ( j = 0; j < HFRAGPIXELS; j++ ){
-        /* Take a local copy of the measured difference. */
-        Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
-        /* Store the actual difference value */
-        YUVDiffsPtr[j] = Diff;
-
-        /* Test against the Level thresholds and record the results */
-        SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
-        if (j>0 && ppi->SrfPakThreshTable[Diff+255] )
-          Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
-            (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
-
-        /* Test against the SRF thresholds */
-        bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
-        FragChangedPixels += ppi->SrfThreshTable[Diff+255];
-      }
-    }else{
-      /* If we are breaking out here mark all pixels as changed. */
-      if ( *DispFragPtr > BLOCK_NOT_CODED ){
-        SET8_1(bits_map_ptr);
-        SET8_8(ChLocalsPtr);
-      }else{
-        SET8_0(ChLocalsPtr);
-      }
-    }
-
-    *RowDiffsPtr += FragChangedPixels;
-    *FDiffPixels += (unsigned char)FragChangedPixels;
-
-    YuvPtr1 += HFRAGPIXELS;
-    YuvPtr2 += HFRAGPIXELS;
-    bits_map_ptr += HFRAGPIXELS;
-    ChLocalsPtr += HFRAGPIXELS;
-    YUVDiffsPtr += HFRAGPIXELS;
-    SgcPtr ++;
-    FDiffPixels ++;
-
-    /* If we have a lot of changed pixels for this fragment on this
-       row then the fragment is almost sure to be picked
-       (e.g. through the line search) so we can mark it as selected
-       and then ignore it. */
-    if (FragChangedPixels >= 7){
-      *DispFragPtr = BLOCK_CODED_LOW;
-    }
-    DispFragPtr++;
-    /*************************************************************/
-    /* Fragment in between!! */
-
-    for ( i = HFRAGPIXELS ; i < ppi->PlaneWidth-HFRAGPIXELS;
-          i += HFRAGPIXELS ){
-      /* Reset count of pixels changed for the current fragment. */
-      FragChangedPixels = 0;
-
-      /* Test for break out conditions to save time. */
-      if (*DispFragPtr == CANDIDATE_BLOCK){
-        /* Clear down entries in changed locals array */
-        SET8_0(ChLocalsPtr);
-        for ( j = 0; j < HFRAGPIXELS; j++ ){
-          /* Take a local copy of the measured difference. */
-          Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
-          /* Store the actual difference value */
-          YUVDiffsPtr[j] = Diff;
-
-          /* Test against the Level thresholds and record the results */
-          SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
-          if (ppi->SrfPakThreshTable[Diff+255] )
-            Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
-              (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
-
-
-          /* Test against the SRF thresholds */
-          bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
-          FragChangedPixels += ppi->SrfThreshTable[Diff+255];
-        }
-      }else{
-        /* If we are breaking out here mark all pixels as changed. */
-        if ( *DispFragPtr > BLOCK_NOT_CODED ){
-          SET8_1(bits_map_ptr);
-          SET8_8(ChLocalsPtr);
-        }else{
-          SET8_0(ChLocalsPtr);
-        }
-      }
-
-      *RowDiffsPtr += FragChangedPixels;
-      *FDiffPixels += (unsigned char)FragChangedPixels;
-
-      YuvPtr1 += HFRAGPIXELS;
-      YuvPtr2 += HFRAGPIXELS;
-      bits_map_ptr += HFRAGPIXELS;
-      ChLocalsPtr += HFRAGPIXELS;
-      YUVDiffsPtr += HFRAGPIXELS;
-      SgcPtr ++;
-      FDiffPixels ++;
-
-      /* If we have a lot of changed pixels for this fragment on this
-         row then the fragment is almost sure to be picked
-         (e.g. through the line search) so we can mark it as selected
-         and then ignore it. */
-      if (FragChangedPixels >= 7){
-        *DispFragPtr = BLOCK_CODED_LOW;
-      }
-      DispFragPtr++;
-    }
-    /*************************************************************/
-    /* Last fragment of row !! */
-
-    /* Reset count of pixels changed for the current fragment. */
-    FragChangedPixels = 0;
-
-    /* Test for break out conditions to save time. */
-    if (*DispFragPtr == CANDIDATE_BLOCK){
-      /* Clear down entries in changed locals array */
-      SET8_0(ChLocalsPtr);
-
-      for ( j = 0; j < HFRAGPIXELS; j++ ){
-        /* Take a local copy of the measured difference. */
-        Diff = (int)YuvPtr1[j] - (int)YuvPtr2[j];
-
-        /* Store the actual difference value */
-        YUVDiffsPtr[j] = Diff;
-
-        /* Test against the Level thresholds and record the results */
-        SgcPtr[0] += ppi->SgcThreshTable[Diff+255];
-
-        if (j<7 && ppi->SrfPakThreshTable[Diff+255] )
-          Diff = (int)ApplyPakLowPass( ppi, &YuvPtr1[j] ) -
-            (int)ApplyPakLowPass( ppi, &YuvPtr2[j] );
-
-
-        /* Test against the SRF thresholds */
-        bits_map_ptr[j] = ppi->SrfThreshTable[Diff+255];
-        FragChangedPixels += ppi->SrfThreshTable[Diff+255];
-      }
-    }else{
-      /* If we are breaking out here mark all pixels as changed.*/
-      if ( *DispFragPtr > BLOCK_NOT_CODED ) {
-          SET8_1(bits_map_ptr);
-          SET8_8(ChLocalsPtr);
-        }else{
-          SET8_0(ChLocalsPtr);
-        }
-    }
-    /* If we have a lot of changed pixels for this fragment on this
-       row then the fragment is almost sure to be picked (e.g. through
-       the line search) so we can mark it as selected and then ignore
-       it. */
-    *RowDiffsPtr += FragChangedPixels;
-    *FDiffPixels += (unsigned char)FragChangedPixels;
-
-    /* If we have a lot of changed pixels for this fragment on this
-       row then the fragment is almost sure to be picked (e.g. through
-       the line search) so we can mark it as selected and then ignore
-       it. */
-    if (FragChangedPixels >= 7){
-      *DispFragPtr = BLOCK_CODED_LOW;
-    }
-    DispFragPtr++;
-
-  }
-}
-
-static void ConsolidateDiffScanResults( PP_INSTANCE *ppi,
-                                        unsigned char * FDiffPixels,
-                                        signed char * SgcScoresPtr,
-                                        signed char * DispFragPtr ){
-  ogg_int32_t i;
-
-  for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
-    /* Consider only those blocks that were candidates in the
-       difference scan. Ignore definite YES and NO cases. */
-    if ( DispFragPtr[i] == CANDIDATE_BLOCK ){
-      if ( ((ogg_uint32_t)abs(SgcScoresPtr[i]) > ppi->BlockSgcThresh) ){
-        /* Block marked for update due to Sgc change */
-        DispFragPtr[i] = BLOCK_CODED_SGC;
-      }else if ( FDiffPixels[i] == 0 ){
-        /* Block is no longer a candidate for the main tests but will
-           still be considered a candidate in RowBarEnhBlockMap() */
-        DispFragPtr[i] = CANDIDATE_BLOCK_LOW;
-      }
-    }
-  }
-}
-
-static void RowChangedLocalsScan( PP_INSTANCE *ppi,
-                                  unsigned char * PixelMapPtr,
-                                  unsigned char * ChLocalsPtr,
-                                  signed char  * DispFragPtr,
-                                  unsigned char   RowType ){
-
-  unsigned char changed_locals = 0;
-  unsigned char * PixelsChangedPtr0;
-  unsigned char * PixelsChangedPtr1;
-  unsigned char * PixelsChangedPtr2;
-  ogg_int32_t i, j;
-  ogg_int32_t LastRowIndex = ppi->PlaneWidth - 1;
-
-  /* Set up the line based pointers into the bits changed map. */
-  PixelsChangedPtr0 = PixelMapPtr - ppi->PlaneWidth;
-  if ( PixelsChangedPtr0 < ppi->PixelChangedMap )
-    PixelsChangedPtr0 += ppi->PixelMapCircularBufferSize;
-  PixelsChangedPtr0 -= 1;
-
-  PixelsChangedPtr1 = PixelMapPtr - 1;
-
-  PixelsChangedPtr2 = PixelMapPtr + ppi->PlaneWidth;
-  if ( PixelsChangedPtr2 >=
-       (ppi->PixelChangedMap + ppi->PixelMapCircularBufferSize) )
-    PixelsChangedPtr2 -= ppi->PixelMapCircularBufferSize;
-  PixelsChangedPtr2 -= 1;
-
-  if ( RowType == NOT_EDGE_ROW ){
-    /* Scan through the row of pixels and calculate changed locals. */
-    for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
-      /* Skip a group of 8 pixels if the assosciated fragment has no
-         pixels of interest. */
-      if ( *DispFragPtr == CANDIDATE_BLOCK ){
-        for ( j = 0; j < HFRAGPIXELS; j++ ){
-          changed_locals = 0;
-
-          /* If the pixel itself has changed */
-          if ( PixelsChangedPtr1[1] ){
-            if ( (i > 0) || (j > 0) ){
-              changed_locals += PixelsChangedPtr0[0];
-              changed_locals += PixelsChangedPtr1[0];
-              changed_locals += PixelsChangedPtr2[0];
-            }
-
-            changed_locals += PixelsChangedPtr0[1];
-            changed_locals += PixelsChangedPtr2[1];
-
-            if ( (i + j) < LastRowIndex ){
-              changed_locals += PixelsChangedPtr0[2];
-              changed_locals += PixelsChangedPtr1[2];
-              changed_locals += PixelsChangedPtr2[2];
-            }
-
-            /* Store the number of changed locals */
-            *ChLocalsPtr |= changed_locals;
-          }
-
-          /* Increment to next pixel in the row */
-          ChLocalsPtr++;
-          PixelsChangedPtr0++;
-          PixelsChangedPtr1++;
-          PixelsChangedPtr2++;
-        }
-      }else{
-        if ( *DispFragPtr > BLOCK_NOT_CODED )
-          SET8_0(ChLocalsPtr);
-
-        /* Step pointers */
-        ChLocalsPtr += HFRAGPIXELS;
-        PixelsChangedPtr0 += HFRAGPIXELS;
-        PixelsChangedPtr1 += HFRAGPIXELS;
-        PixelsChangedPtr2 += HFRAGPIXELS;
-      }
-
-      /* Move on to next fragment. */
-      DispFragPtr++;
-
-    }
-  }else{
-    /* Scan through the row of pixels and calculate changed locals. */
-    for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
-      /* Skip a group of 8 pixels if the assosciated fragment has no
-         pixels of interest */
-      if ( *DispFragPtr == CANDIDATE_BLOCK ){
-        for ( j = 0; j < HFRAGPIXELS; j++ ){
-          changed_locals = 0;
-
-          /* If the pixel itself has changed */
-          if ( PixelsChangedPtr1[1] ){
-            if ( RowType == FIRST_ROW ){
-              if ( (i > 0) || (j > 0) ){
-                changed_locals += PixelsChangedPtr1[0];
-                changed_locals += PixelsChangedPtr2[0];
-              }
-
-              changed_locals += PixelsChangedPtr2[1];
-
-              if ( (i + j) < LastRowIndex ){
-                changed_locals += PixelsChangedPtr1[2];
-                changed_locals += PixelsChangedPtr2[2];
-              }
-            }else{
-              if ( (i > 0) || (j > 0 ) ){
-                changed_locals += PixelsChangedPtr0[0];
-                changed_locals += PixelsChangedPtr1[0];
-              }
-
-              changed_locals += PixelsChangedPtr0[1];
-
-              if ( (i + j) < LastRowIndex ){
-                changed_locals += PixelsChangedPtr0[2];
-                changed_locals += PixelsChangedPtr1[2];
-              }
-            }
-
-            /* Store the number of changed locals */
-            *ChLocalsPtr |= changed_locals;
-          }
-
-          /* Increment to next pixel in the row */
-          ChLocalsPtr++;
-          PixelsChangedPtr0++;
-          PixelsChangedPtr1++;
-          PixelsChangedPtr2++;
-        }
-      }else{
-        if ( *DispFragPtr > BLOCK_NOT_CODED )
-          SET8_0(ChLocalsPtr);
-
-        /* Step pointers */
-        ChLocalsPtr += HFRAGPIXELS;
-        PixelsChangedPtr0 += HFRAGPIXELS;
-        PixelsChangedPtr1 += HFRAGPIXELS;
-        PixelsChangedPtr2 += HFRAGPIXELS;
-      }
-
-      /* Move on to next fragment. */
-      DispFragPtr++;
-    }
-  }
-}
-
-static void NoiseScoreRow( PP_INSTANCE *ppi,
-                           unsigned char * PixelMapPtr,
-                           unsigned char * ChLocalsPtr,
-                           ogg_int16_t   * YUVDiffsPtr,
-                           unsigned char * PixelNoiseScorePtr,
-                           ogg_uint32_t  * FragScorePtr,
-                           signed char   * DispFragPtr,
-                           ogg_int32_t   * RowDiffsPtr ){
-  ogg_int32_t i,j;
-  unsigned char  changed_locals = 0;
-  ogg_int32_t  Score;
-  ogg_uint32_t FragScore;
-  ogg_int32_t  AbsDiff;
-
-  /* For each pixel in the row */
-  for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
-    /* Skip a group of 8 pixels if the assosciated fragment has no
-       pixels of interest. */
-    if ( *DispFragPtr == CANDIDATE_BLOCK ){
-      /* Reset the cumulative fragment score. */
-      FragScore = 0;
-
-      /* Pixels grouped along the row into fragments */
-      for ( j = 0; j < HFRAGPIXELS; j++ ){
-        if ( PixelMapPtr[j] ){
-          AbsDiff = (ogg_int32_t)( abs(YUVDiffsPtr[j]) );
-          changed_locals = ChLocalsPtr[j];
-
-          /* Give this pixel a score based on changed locals and level
-             of its own change. */
-          Score = (1 + ((ogg_int32_t)(changed_locals +
-                                      ppi->NoiseScoreBoostTable[AbsDiff]) -
-                        ppi->NoiseSupLevel));
-
-          /* For no zero scores adjust by a level based score multiplier. */
-          if ( Score > 0 ){
-            Score = ((double)Score *
-                     ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
-            if ( Score < 1 )
-              Score = 1;
-          }else{
-            /* Set -ve values to 0 */
-            Score = 0;
-
-            /* If there are no changed locals then clear the pixel
-               changed flag and decrement the pixels changed in
-               fragment count to speed later stages. */
-            if ( changed_locals == 0 ){
-              PixelMapPtr[j] = 0;
-              *RowDiffsPtr -= 1;
-            }
-          }
-
-          /* Update the pixel scores etc. */
-          PixelNoiseScorePtr[j] = (unsigned char)Score;
-          FragScore += (ogg_uint32_t)Score;
-        }
-      }
-
-      /* Add fragment score (with plane correction factor) into main
-         data structure */
-      *FragScorePtr += (ogg_int32_t)(FragScore *
-                                     ppi->YUVPlaneCorrectionFactor);
-
-      /* If score is greater than trip threshold then mark blcok for update. */
-      if ( *FragScorePtr > ppi->BlockThreshold ){
-        *DispFragPtr = BLOCK_CODED_LOW;
-      }
-    }
-
-    /* Increment the various pointers */
-    FragScorePtr++;
-    DispFragPtr++;
-    PixelNoiseScorePtr += HFRAGPIXELS;
-    PixelMapPtr += HFRAGPIXELS;
-    ChLocalsPtr += HFRAGPIXELS;
-    YUVDiffsPtr += HFRAGPIXELS;
-  }
-}
-
-static void PrimaryEdgeScoreRow( PP_INSTANCE *ppi,
-                                 unsigned char * ChangedLocalsPtr,
-                                 ogg_int16_t   * YUVDiffsPtr,
-                                 unsigned char * PixelNoiseScorePtr,
-                                 ogg_uint32_t  * FragScorePtr,
-                                 signed char   * DispFragPtr,
-                                 unsigned char   RowType ){
-  ogg_uint32_t     BodyNeighbours;
-  ogg_uint32_t     AbsDiff;
-  unsigned char    changed_locals = 0;
-  ogg_int32_t      Score;
-  ogg_uint32_t     FragScore;
-  unsigned char  * CHLocalsPtr0;
-  unsigned char  * CHLocalsPtr1;
-  unsigned char  * CHLocalsPtr2;
-  ogg_int32_t      i,j;
-  ogg_int32_t      LastRowIndex = ppi->PlaneWidth - 1;
-
-  /* Set up pointers into the current previous and next row of the
-     changed locals data structure. */
-  CHLocalsPtr0 = ChangedLocalsPtr - ppi->PlaneWidth;
-  if ( CHLocalsPtr0 < ppi->ChLocals )
-    CHLocalsPtr0 += ppi->ChLocalsCircularBufferSize;
-  CHLocalsPtr0 -= 1;
-
-  CHLocalsPtr1 = ChangedLocalsPtr - 1;
-
-  CHLocalsPtr2 = ChangedLocalsPtr + ppi->PlaneWidth;
-  if ( CHLocalsPtr2 >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
-    CHLocalsPtr2 -= ppi->ChLocalsCircularBufferSize;
-  CHLocalsPtr2 -= 1;
-
-
-  /* The defining rule used here is as follows. */
-  /* An edge pixels has 3-5 changed locals. */
-  /* And one or more of these changed locals has itself got 7-8
-     changed locals. */
-
-  if ( RowType == NOT_EDGE_ROW ){
-    /* Loop for all pixels in the row. */
-    for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
-      /* Does the fragment contain anything interesting to work with. */
-      if ( *DispFragPtr == CANDIDATE_BLOCK ){
-        /* Reset the cumulative fragment score. */
-        FragScore = 0;
-
-        /* Pixels grouped along the row into fragments */
-        for ( j = 0; j < HFRAGPIXELS; j++ ){
-          /* How many changed locals has the current pixel got. */
-          changed_locals = ChangedLocalsPtr[j];
-
-          /* Is the pixel a suitable candidate */
-          if ( (changed_locals > 2) && (changed_locals < 6) ){
-            /* The pixel may qualify... have a closer look.  */
-            BodyNeighbours = 0;
-
-            /* Count the number of "BodyNeighbours" .. Pixels that
-               have 7 or more changed neighbours.  */
-            if ( (i > 0) || (j > 0 ) ){
-              if ( CHLocalsPtr0[0] >= 7 )
-                BodyNeighbours++;
-              if ( CHLocalsPtr1[0] >= 7 )
-                BodyNeighbours++;
-              if ( CHLocalsPtr2[0] >= 7 )
-                BodyNeighbours++;
-            }
-
-            if ( CHLocalsPtr0[1] >= 7 )
-              BodyNeighbours++;
-            if ( CHLocalsPtr2[1] >= 7 )
-              BodyNeighbours++;
-
-            if ( (i + j) < LastRowIndex ){
-              if ( CHLocalsPtr0[2] >= 7 )
-                BodyNeighbours++;
-              if ( CHLocalsPtr1[2] >= 7 )
-                BodyNeighbours++;
-              if ( CHLocalsPtr2[2] >= 7 )
-                BodyNeighbours++;
-            }
-
-            if ( BodyNeighbours > 0 ){
-              AbsDiff = abs( YUVDiffsPtr[j] );
-              Score = (ogg_int32_t)
-                ( (double)(BodyNeighbours *
-                           BodyNeighbourScore) *
-                  ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
-              if ( Score < 1 )
-                Score = 1;
-
-              /* Increment the score by a value determined by the
-                 number of body neighbours. */
-              PixelNoiseScorePtr[j] += (unsigned char)Score;
-              FragScore += (ogg_uint32_t)Score;
-            }
-          }
-
-          /* Increment pointers into changed locals buffer */
-          CHLocalsPtr0 ++;
-          CHLocalsPtr1 ++;
-          CHLocalsPtr2 ++;
-        }
-
-        /* Add fragment score (with plane correction factor) into main
-           data structure */
-        *FragScorePtr += (ogg_int32_t)(FragScore *
-                                       ppi->YUVPlaneCorrectionFactor);
-
-        /* If score is greater than trip threshold then mark blcok for
-           update. */
-        if ( *FragScorePtr > ppi->BlockThreshold ){
-          *DispFragPtr = BLOCK_CODED_LOW;
-        }
-
-      }else{
-        /* Nothing to do for this fragment group */
-        /* Advance pointers into changed locals buffer */
-        CHLocalsPtr0 += HFRAGPIXELS;
-        CHLocalsPtr1 += HFRAGPIXELS;
-        CHLocalsPtr2 += HFRAGPIXELS;
-      }
-
-      /* Increment the various pointers */
-      FragScorePtr++;
-      DispFragPtr++;
-      PixelNoiseScorePtr += HFRAGPIXELS;
-      ChangedLocalsPtr += HFRAGPIXELS;
-      YUVDiffsPtr += HFRAGPIXELS;
-    }
-  }else{
-    /* This is either the top or bottom row of pixels in a plane. */
-    /* Loop for all pixels in the row. */
-    for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
-      /* Does the fragment contain anything interesting to work with. */
-      if ( *DispFragPtr == CANDIDATE_BLOCK ){
-        /* Reset the cumulative fragment score. */
-        FragScore = 0;
-
-        /* Pixels grouped along the row into fragments */
-        for ( j = 0; j < HFRAGPIXELS; j++ ){
-          /* How many changed locals has the current pixel got. */
-          changed_locals = ChangedLocalsPtr[j];
-
-          /* Is the pixel a suitable candidate */
-          if ( (changed_locals > 2) && (changed_locals < 6) ){
-            /* The pixel may qualify... have a closer look. */
-            BodyNeighbours = 0;
-
-            /* Count the number of "BodyNeighbours" .. Pixels
-               that have 7 or more changed neighbours. */
-            if ( RowType == LAST_ROW ){
-              /* Test for cases where it could be the first pixel on
-                 the line */
-              if ( (i > 0) || (j > 0) ){
-                if ( CHLocalsPtr0[0] >= 7 )
-                  BodyNeighbours++;
-                if ( CHLocalsPtr1[0] >= 7 )
-                  BodyNeighbours++;
-              }
-
-              if ( CHLocalsPtr0[1] >= 7 )
-                BodyNeighbours++;
-
-              /* Test for the end of line case */
-              if ( (i + j) < LastRowIndex ){
-                if ( CHLocalsPtr0[2] >= 7 )
-                  BodyNeighbours++;
-
-                if ( CHLocalsPtr1[2] >= 7 )
-                  BodyNeighbours++;
-              }
-            }else{
-              /* First Row */
-              /* Test for cases where it could be the first pixel on
-                 the line */
-              if ( (i > 0) || (j > 0) ){
-                if ( CHLocalsPtr1[0] >= 7 )
-                  BodyNeighbours++;
-                if ( CHLocalsPtr2[0] >= 7 )
-                  BodyNeighbours++;
-              }
-
-              /* Test for the end of line case */
-              if ( CHLocalsPtr2[1] >= 7 )
-                BodyNeighbours++;
-
-              if ( (i + j) < LastRowIndex ){
-                if ( CHLocalsPtr1[2] >= 7 )
-                  BodyNeighbours++;
-                if ( CHLocalsPtr2[2] >= 7 )
-                  BodyNeighbours++;
-              }
-            }
-
-            /* Allocate a score according to the number of Body neighbours. */
-            if ( BodyNeighbours > 0 ){
-              AbsDiff = abs( YUVDiffsPtr[j] );
-              Score = (ogg_int32_t)
-                ( (double)(BodyNeighbours * BodyNeighbourScore) *
-                  ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
-              if ( Score < 1 )
-                Score = 1;
-
-              PixelNoiseScorePtr[j] += (unsigned char)Score;
-              FragScore += (ogg_uint32_t)Score;
-            }
-          }
-
-          /* Increment pointers into changed locals buffer */
-          CHLocalsPtr0 ++;
-          CHLocalsPtr1 ++;
-          CHLocalsPtr2 ++;
-        }
-
-        /* Add fragment score (with plane correction factor) into main
-           data structure */
-        *FragScorePtr +=
-          (ogg_int32_t)(FragScore * ppi->YUVPlaneCorrectionFactor);
-
-        /* If score is greater than trip threshold then mark blcok for
-           update. */
-        if ( *FragScorePtr > ppi->BlockThreshold ){
-          *DispFragPtr = BLOCK_CODED_LOW;
-        }
-
-      }else{
-        /* Nothing to do for this fragment group */
-        /* Advance pointers into changed locals buffer */
-        CHLocalsPtr0 += HFRAGPIXELS;
-        CHLocalsPtr1 += HFRAGPIXELS;
-        CHLocalsPtr2 += HFRAGPIXELS;
-      }
-
-      /* Increment the various pointers */
-      FragScorePtr++;
-      DispFragPtr++;
-      PixelNoiseScorePtr += HFRAGPIXELS;
-      ChangedLocalsPtr += HFRAGPIXELS;
-      YUVDiffsPtr += HFRAGPIXELS;
-    }
-  }
-}
-
-static void PixelLineSearch( PP_INSTANCE *ppi,
-                             unsigned char * ChangedLocalsPtr,
-                             ogg_int32_t RowNumber,
-                             ogg_int32_t ColNumber,
-                             unsigned char direction,
-                             ogg_uint32_t * line_length ){
-  /* Exit if the pixel does not qualify or we have fallen off the edge
-     of either the image plane or the row. */
-  if ( (RowNumber < 0) ||
-       (RowNumber >= ppi->PlaneHeight) ||
-       (ColNumber < 0) ||
-       (ColNumber >= ppi->PlaneWidth) ||
-       ((*ChangedLocalsPtr) <= 1) ||
-       ((*ChangedLocalsPtr) >= 6) ){
-    /* If not then it isn't part of any line. */
-    return;
-  }
-
-  if (*line_length < ppi->MaxLineSearchLen){
-    ogg_uint32_t TmpLineLength;
-    ogg_uint32_t BestLineLength;
-    unsigned char * search_ptr;
-
-    /* Increment the line length to include this pixel. */
-    *line_length += 1;
-    BestLineLength = *line_length;
-
-    /* Continue search  */
-    /* up */
-    if ( direction == UP ){
-      TmpLineLength = *line_length;
-
-      search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
-      if ( search_ptr < ppi->ChLocals )
-        search_ptr += ppi->ChLocalsCircularBufferSize;
-
-      PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber,
-                       direction, &TmpLineLength );
-
-      if ( TmpLineLength > BestLineLength )
-        BestLineLength = TmpLineLength;
-    }
-
-    /* up and left */
-    if ( (BestLineLength < ppi->MaxLineSearchLen) &&
-         ((direction == UP) || (direction == LEFT)) ){
-      TmpLineLength = *line_length;
-
-      search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
-      if ( search_ptr < ppi->ChLocals )
-        search_ptr += ppi->ChLocalsCircularBufferSize;
-      search_ptr -= 1;
-
-      PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber - 1,
-                       direction,  &TmpLineLength );
-
-      if ( TmpLineLength > BestLineLength )
-        BestLineLength = TmpLineLength;
-    }
-
-    /* up and right */
-    if ( (BestLineLength < ppi->MaxLineSearchLen) &&
-         ((direction == UP) || (direction == RIGHT)) ){
-      TmpLineLength = *line_length;
-
-      search_ptr = ChangedLocalsPtr - ppi->PlaneWidth;
-      if ( search_ptr < ppi->ChLocals )
-        search_ptr += ppi->ChLocalsCircularBufferSize;
-      search_ptr += 1;
-
-      PixelLineSearch( ppi, search_ptr, RowNumber - 1, ColNumber + 1,
-                       direction, &TmpLineLength );
-
-      if ( TmpLineLength > BestLineLength )
-        BestLineLength = TmpLineLength;
-    }
-
-    /* left */
-    if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == LEFT ) ){
-      TmpLineLength = *line_length;
-      PixelLineSearch( ppi, ChangedLocalsPtr - 1, RowNumber, ColNumber - 1,
-                       direction, &TmpLineLength );
-
-      if ( TmpLineLength > BestLineLength )
-        BestLineLength = TmpLineLength;
-    }
-
-    /* right */
-    if ( (BestLineLength < ppi->MaxLineSearchLen) && ( direction == RIGHT ) ){
-      TmpLineLength = *line_length;
-      PixelLineSearch( ppi, ChangedLocalsPtr + 1, RowNumber, ColNumber + 1,
-                       direction, &TmpLineLength );
-
-      if ( TmpLineLength > BestLineLength )
-        BestLineLength = TmpLineLength;
-    }
-
-    /* Down */
-    if ( BestLineLength < ppi->MaxLineSearchLen ){
-      TmpLineLength = *line_length;
-      if ( direction == DOWN ){
-        search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
-        if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
-          search_ptr -= ppi->ChLocalsCircularBufferSize;
-
-        PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber, direction,
-                         &TmpLineLength );
-
-        if ( TmpLineLength > BestLineLength )
-          BestLineLength = TmpLineLength;
-      }
-
-
-      /* down and left */
-      if ( (BestLineLength < ppi->MaxLineSearchLen) &&
-           ((direction == DOWN) || (direction == LEFT)) ){
-        TmpLineLength = *line_length;
-
-        search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
-        if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
-          search_ptr -= ppi->ChLocalsCircularBufferSize;
-        search_ptr -= 1;
-
-        PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber - 1,
-                         direction, &TmpLineLength );
-
-        if ( TmpLineLength > BestLineLength )
-          BestLineLength = TmpLineLength;
-      }
-
-      /* down and right */
-      if ( (BestLineLength < ppi->MaxLineSearchLen) &&
-           ((direction == DOWN) || (direction == RIGHT)) ){
-        TmpLineLength = *line_length;
-
-        search_ptr = ChangedLocalsPtr + ppi->PlaneWidth;
-        if ( search_ptr >= (ppi->ChLocals + ppi->ChLocalsCircularBufferSize) )
-          search_ptr -= ppi->ChLocalsCircularBufferSize;
-        search_ptr += 1;
-
-        PixelLineSearch( ppi, search_ptr, RowNumber + 1, ColNumber + 1,
-                         direction, &TmpLineLength );
-
-        if ( TmpLineLength > BestLineLength )
-          BestLineLength = TmpLineLength;
-      }
-    }
-
-    /* Note the search value for this pixel. */
-    *line_length = BestLineLength;
-  }
-}
-
-static unsigned char LineSearchScorePixel( PP_INSTANCE *ppi,
-                                           unsigned char * ChangedLocalsPtr,
-                                           ogg_int32_t RowNumber,
-                                           ogg_int32_t ColNumber ){
-    ogg_uint32_t line_length = 0;
-    ogg_uint32_t line_length2 = 0;
-    ogg_uint32_t line_length_score = 0;
-    ogg_uint32_t tmp_line_length = 0;
-    ogg_uint32_t tmp_line_length2 = 0;
-
-    /* Look UP and Down */
-    PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
-                     ColNumber, UP, &tmp_line_length );
-
-    if (tmp_line_length < ppi->MaxLineSearchLen) {
-      /* Look DOWN */
-      PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
-                       ColNumber, DOWN, &tmp_line_length2 );
-      line_length = tmp_line_length + tmp_line_length2 - 1;
-
-      if ( line_length > ppi->MaxLineSearchLen )
-        line_length = ppi->MaxLineSearchLen;
-    }else
-      line_length = tmp_line_length;
-
-    /* If no max length line found then look left and right */
-    if ( line_length < ppi->MaxLineSearchLen ){
-      tmp_line_length = 0;
-      tmp_line_length2 = 0;
-
-      PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
-                       ColNumber, LEFT,  &tmp_line_length );
-      if (tmp_line_length < ppi->MaxLineSearchLen){
-        PixelLineSearch( ppi, ChangedLocalsPtr, RowNumber,
-                         ColNumber, RIGHT,  &tmp_line_length2 );
-        line_length2 = tmp_line_length + tmp_line_length2 - 1;
-
-        if ( line_length2 > ppi->MaxLineSearchLen )
-          line_length2 = ppi->MaxLineSearchLen;
-      }else
-        line_length2 = tmp_line_length;
-
-    }
-
-    /* Take the largest line length */
-    if ( line_length2 > line_length )
-      line_length = line_length2;
-
-    /* Create line length score */
-    line_length_score = LineLengthScores[line_length];
-
-    return (unsigned char)line_length_score;
-}
-
-static void LineSearchScoreRow( PP_INSTANCE *ppi,
-                                unsigned char * ChangedLocalsPtr,
-                                ogg_int16_t   * YUVDiffsPtr,
-                                unsigned char * PixelNoiseScorePtr,
-                                ogg_uint32_t  * FragScorePtr,
-                                signed char   * DispFragPtr,
-                                ogg_int32_t     RowNumber ){
-  ogg_uint32_t AbsDiff;
-  unsigned char  changed_locals = 0;
-  ogg_int32_t  Score;
-  ogg_uint32_t FragScore;
-  ogg_int32_t  i,j;
-
-  /* The defining rule used here is as follows. */
-  /* An edge pixels has 2-5 changed locals. */
-  /* And one or more of these changed locals has itself got 7-8
-     changed locals. */
-
-  /* Loop for all pixels in the row. */
-  for ( i = 0; i < ppi->PlaneWidth; i += HFRAGPIXELS ){
-    /* Does the fragment contain anything interesting to work with. */
-    if ( *DispFragPtr == CANDIDATE_BLOCK ){
-      /* Reset the cumulative fragment score. */
-      FragScore = 0;
-
-      /* Pixels grouped along the row into fragments */
-      for ( j = 0; j < HFRAGPIXELS; j++ ){
-        /* How many changed locals has the current pixel got. */
-        changed_locals = ChangedLocalsPtr[j];
-
-        /* Is the pixel a suitable candidate for edge enhancement */
-        if ( (changed_locals > 1) && (changed_locals < 6) &&
-             (PixelNoiseScorePtr[j] < ppi->LineSearchTripTresh) ) {
-          Score = (ogg_int32_t)
-            LineSearchScorePixel( ppi, &ChangedLocalsPtr[j], RowNumber, i+j );
-
-          if ( Score ){
-            AbsDiff = abs( YUVDiffsPtr[j] );
-            Score = (ogg_int32_t)
-              ( (double)Score * ppi->AbsDiff_ScoreMultiplierTable[AbsDiff] );
-            if ( Score < 1 )
-              Score = 1;
-
-            PixelNoiseScorePtr[j] += (unsigned char)Score;
-            FragScore += (ogg_uint32_t)Score;
-          }
-        }
-      }
-
-      /* Add fragment score (with plane correction factor) into main
-         data structure */
-      *FragScorePtr +=
-        (ogg_int32_t)(FragScore * ppi->YUVPlaneCorrectionFactor);
-
-      /* If score is greater than trip threshold then mark blcok for update. */
-      if ( *FragScorePtr > ppi->BlockThreshold ){
-        *DispFragPtr = BLOCK_CODED_LOW;
-      }
-    }
-
-    /* Increment the various pointers */
-    FragScorePtr++;
-    DispFragPtr++;
-    PixelNoiseScorePtr += HFRAGPIXELS;
-    ChangedLocalsPtr += HFRAGPIXELS;
-    YUVDiffsPtr += HFRAGPIXELS;
-
-  }
-}
-
-static void RowCopy( PP_INSTANCE *ppi, ogg_uint32_t BlockMapIndex ){
-
-  ogg_uint32_t   i,j;
-
-  ogg_uint32_t   PixelIndex = ppi->ScanPixelIndexTable[BlockMapIndex];
-  signed char   * BlockMapPtr = &ppi->ScanDisplayFragments[BlockMapIndex];
-  signed char   * PrevFragmentsPtr = &ppi->PrevFragments[0][BlockMapIndex];
-
-  unsigned char  * SourcePtr;
-  unsigned char  * DestPtr;
-
-  /* Copy pixels from changed blocks back to reference frame. */
-  for ( i = 0; i < (ogg_uint32_t)ppi->PlaneHFragments; i ++ ){
-    /* If the fragement is marked for update or was recently marked
-       for update (PrevFragmentsPtr[i]) */
-    if ( (BlockMapPtr[i] > BLOCK_NOT_CODED) ||
-         (PrevFragmentsPtr[i] == BLOCK_CODED) ){
-      /* Set up the various pointers required. */
-      SourcePtr = &ppi->ScanConfig.Yuv1ptr[PixelIndex];
-      DestPtr = &ppi->ScanConfig.SrfWorkSpcPtr[PixelIndex];
-
-      /* For each row of the block */
-      for ( j = 0; j < VFRAGPIXELS; j++ ){
-        /* Copy the data unaltered from source to destination */
-        memcpy(DestPtr,SourcePtr,8);
-
-        /* Increment pointers for next line in the block */
-        SourcePtr += ppi->PlaneWidth;
-        DestPtr += ppi->PlaneWidth;
-      }
-    }
-
-    /* Increment pixel index for next block. */
-    PixelIndex += HFRAGPIXELS;
-  }
-}
-
-static void RowBarEnhBlockMap( PP_INSTANCE *ppi,
-                               signed char   * UpdatedBlockMapPtr,
-                               signed char   * BarBlockMapPtr,
-                               ogg_uint32_t RowNumber ){
-  int i;
-
-  /* Start by blanking the row in the bar block map structure. */
-  memset( BarBlockMapPtr, BLOCK_NOT_CODED, ppi->PlaneHFragments );
-
-  /* First row */
-  if ( RowNumber == 0 ){
-
-    /* For each fragment in the row. */
-    for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
-      /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW. Uncoded or
-         coded blocks will be ignored. */
-      if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){
-        /* Is one of the immediate neighbours updated in the main map. */
-        /* Note special cases for blocks at the start and end of rows. */
-        if ( i == 0 ){
-
-          if ((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
-              (UpdatedBlockMapPtr[i+ppi->PlaneHFragments]>BLOCK_NOT_CODED ) ||
-              (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1]>BLOCK_NOT_CODED ) )
-            BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
-
-        }else if ( i == (ppi->PlaneHFragments - 1) ){
-
-          if ((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
-              (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1]>BLOCK_NOT_CODED) ||
-               (UpdatedBlockMapPtr[i+ppi->PlaneHFragments]>BLOCK_NOT_CODED) )
-              BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
-        }else{
-          if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) )
-              BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-        }
-      }
-    }
-
-  } else if ( RowNumber == (ogg_uint32_t)(ppi->PlaneVFragments-1)) {
-
-    /* Last row */
-    /* Used to read PlaneHFragments */
-
-    /* For each fragment in the row. */
-    for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
-      /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW
-         Uncoded or coded blocks will be ignored. */
-      if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){
-        /* Is one of the immediate neighbours updated in the main map. */
-        /* Note special cases for blocks at the start and end of rows. */
-        if ( i == 0 ){
-          if((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ))
-            BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
-        }else if ( i == (ppi->PlaneHFragments - 1) ){
-          if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
-            BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-        }else{
-          if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) )
-            BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-        }
-      }
-    }
-
-  }else{
-    /* All other rows */
-    /* For each fragment in the row. */
-    for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
-      /* Test for CANDIDATE_BLOCK or CANDIDATE_BLOCK_LOW */
-      /* Uncoded or coded blocks will be ignored. */
-      if ( UpdatedBlockMapPtr[i] <= CANDIDATE_BLOCK ){
-        /* Is one of the immediate neighbours updated in the main map. */
-        /* Note special cases for blocks at the start and end of rows. */
-        if ( i == 0 ){
-
-          if((UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED) )
-            BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
-        }else if ( i == (ppi->PlaneHFragments - 1) ){
-
-          if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) )
-            BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-
-        }else{
-          if((UpdatedBlockMapPtr[i-1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+1] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i-ppi->PlaneHFragments+1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments-1] > BLOCK_NOT_CODED)||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments] > BLOCK_NOT_CODED ) ||
-             (UpdatedBlockMapPtr[i+ppi->PlaneHFragments+1] > BLOCK_NOT_CODED ))
-            BarBlockMapPtr[i] = BLOCK_CODED_BAR;
-        }
-      }
-    }
-  }
-}
-
-static void BarCopyBack( PP_INSTANCE *ppi,
-                         signed char  * UpdatedBlockMapPtr,
-                         signed char  * BarBlockMapPtr ){
-  ogg_int32_t i;
-
-  /* For each fragment in the row. */
-  for ( i = 0; i < ppi->PlaneHFragments; i ++ ){
-    if ( BarBlockMapPtr[i] > BLOCK_NOT_CODED ){
-      UpdatedBlockMapPtr[i] = BarBlockMapPtr[i];
-    }
-  }
-}
-
-static void AnalysePlane( PP_INSTANCE *ppi,
-                          unsigned char * PlanePtr0,
-                          unsigned char * PlanePtr1,
-                          ogg_uint32_t FragArrayOffset,
-                          ogg_uint32_t PWidth,
-                          ogg_uint32_t PHeight,
-                          ogg_uint32_t PStride ) {
-  unsigned char  * RawPlanePtr0;
-  unsigned char  * RawPlanePtr1;
-
-  ogg_int16_t  * YUVDiffsPtr;
-  ogg_int16_t  * YUVDiffsPtr1;
-  ogg_int16_t  * YUVDiffsPtr2;
-
-  ogg_uint32_t FragIndex;
-  ogg_uint32_t ScoreFragIndex1;
-  ogg_uint32_t ScoreFragIndex2;
-  ogg_uint32_t ScoreFragIndex3;
-  ogg_uint32_t ScoreFragIndex4;
-
-  int   UpdatedOrCandidateBlocks = 0;
-
-  unsigned char  * ChLocalsPtr0;
-  unsigned char  * ChLocalsPtr1;
-  unsigned char  * ChLocalsPtr2;
-
-  unsigned char  * PixelsChangedPtr0;
-  unsigned char  * PixelsChangedPtr1;
-
-  unsigned char  * PixelScoresPtr1;
-  unsigned char  * PixelScoresPtr2;
-
-  signed char   * DispFragPtr0;
-  signed char   * DispFragPtr1;
-  signed char   * DispFragPtr2;
-
-  ogg_uint32_t * FragScoresPtr1;
-  ogg_uint32_t * FragScoresPtr2;
-
-  ogg_int32_t  * RowDiffsPtr;
-  ogg_int32_t  * RowDiffsPtr1;
-  ogg_int32_t  * RowDiffsPtr2;
-
-  ogg_int32_t  i,j;
-
-  ogg_int32_t  RowNumber1;
-  ogg_int32_t  RowNumber2;
-  ogg_int32_t  RowNumber3;
-  ogg_int32_t  RowNumber4;
-
-  int   EdgeRow;
-  ogg_int32_t  LineSearchRowNumber = 0;
-
-  /* Variables used as temporary stores for frequently used values. */
-  ogg_int32_t  Row0Mod3;
-  ogg_int32_t  Row1Mod3;
-  ogg_int32_t  Row2Mod3;
-  ogg_int32_t  BlockRowPixels;
-
-  /* Set pixel difference threshold */
-  if ( FragArrayOffset == 0 ){
-    /* Luminance */
-    ppi->LevelThresh = (int)ppi->SgcLevelThresh;
-    ppi->NegLevelThresh = -ppi->LevelThresh;
-
-    ppi->SrfThresh = (int)ppi->SRFGreyThresh;
-    ppi->NegSrfThresh = -ppi->SrfThresh;
-
-    /* Scores correction for Y pixels. */
-    ppi->YUVPlaneCorrectionFactor = 1.0;
-
-    ppi->BlockThreshold = ppi->PrimaryBlockThreshold;
-    ppi->BlockSgcThresh = ppi->SgcThresh;
-  }else{
-    /* Chrominance */
-    ppi->LevelThresh = (int)ppi->SuvcLevelThresh;
-    ppi->NegLevelThresh = -ppi->LevelThresh;
-
-    ppi->SrfThresh = (int)ppi->SRFColThresh;
-    ppi->NegSrfThresh = -ppi->SrfThresh;
-
-    /* Scores correction for UV pixels. */
-    ppi->YUVPlaneCorrectionFactor = 1.5;
-
-    /* Block threholds different for subsampled U and V blocks */
-    ppi->BlockThreshold =
-      (ppi->PrimaryBlockThreshold / ppi->UVBlockThreshCorrection);
-    ppi->BlockSgcThresh =
-      (ppi->SgcThresh / ppi->UVSgcCorrection);
-  }
-
-  /* Initialise the SRF thresh table and pointer. */
-  memset( ppi->SrfThreshTable, 1, 512 );
-  for ( i = ppi->NegSrfThresh; i <= ppi->SrfThresh; i++ )
-    ppi->SrfThreshTable[i+255] = 0;
-
-  /* Initialise the PAK thresh table. */
-  for ( i = -255; i <= 255; i++ )
-    if ( ppi->SrfThreshTable[i+255] &&
-         (i <= ppi->HighChange) &&
-         (i >= ppi->NegHighChange) )
-      ppi->SrfPakThreshTable[i+255] = 1;
-    else
-      ppi->SrfPakThreshTable[i+255] = 0;
-
-  /* Initialise the SGc lookup table */
-  for ( i = -255; i <= 255; i++ ){
-    if ( i <= ppi->NegLevelThresh )
-      ppi->SgcThreshTable[i+255] = (unsigned char) -1;
-    else if ( i >= ppi->LevelThresh )
-      ppi->SgcThreshTable[i+255] = 1;
-    else
-      ppi->SgcThreshTable[i+255] = 0;
-  }
-
-  /* Set up plane dimension variables */
-  ppi->PlaneHFragments = PWidth / HFRAGPIXELS;
-  ppi->PlaneVFragments = PHeight / VFRAGPIXELS;
-  ppi->PlaneWidth = PWidth;
-  ppi->PlaneHeight = PHeight;
-  ppi->PlaneStride = PStride;
-
-  /* Set up local pointers into the raw image data. */
-  RawPlanePtr0 = PlanePtr0;
-  RawPlanePtr1 = PlanePtr1;
-
-  /* Note size and endo points for circular buffers. */
-  ppi->YuvDiffsCircularBufferSize = YDIFF_CB_ROWS * ppi->PlaneWidth;
-  ppi->ChLocalsCircularBufferSize = CHLOCALS_CB_ROWS * ppi->PlaneWidth;
-  ppi->PixelMapCircularBufferSize = PMAP_CB_ROWS * ppi->PlaneWidth;
-
-  /* Set high change thresh where PAK not needed */
-  ppi->HighChange = ppi->SrfThresh * 4;
-  ppi->NegHighChange = -ppi->HighChange;
-
-  /* Set up row difference pointers. */
-  RowDiffsPtr = ppi->RowChangedPixels;
-  RowDiffsPtr1 = ppi->RowChangedPixels;
-  RowDiffsPtr2 = ppi->RowChangedPixels;
-
-  BlockRowPixels = ppi->PlaneWidth * VFRAGPIXELS;
-
-  for ( i = 0; i < (ppi->PlaneVFragments + 4); i++ ){
-    RowNumber1 = (i - 1);
-    RowNumber2 = (i - 2);
-    RowNumber3 = (i - 3);
-    RowNumber4 = (i - 4);
-
-    /* Pre calculate some frequently used values */
-    Row0Mod3 = i % 3;
-    Row1Mod3 = RowNumber1 % 3;
-    Row2Mod3 = RowNumber2 % 3;
-
-    /*  For row diff scan last two iterations are invalid */
-    if ( i < ppi->PlaneVFragments ){
-      FragIndex = (i * ppi->PlaneHFragments) + FragArrayOffset;
-      YUVDiffsPtr = &ppi->yuv_differences[Row0Mod3 * BlockRowPixels];
-
-      PixelsChangedPtr0 = (&ppi->PixelChangedMap[Row0Mod3 * BlockRowPixels]);
-      DispFragPtr0 =  &ppi->ScanDisplayFragments[FragIndex];
-
-      ChLocalsPtr0 = (&ppi->ChLocals[Row0Mod3 * BlockRowPixels]);
-
-    }
-
-    /* Set up the changed locals pointer to trail behind by one row of
-       fragments. */
-    if ( i > 0 ){
-      /* For last iteration the ch locals and noise scans are invalid */
-      if ( RowNumber1 < ppi->PlaneVFragments ){
-        ScoreFragIndex1 = (RowNumber1 * ppi->PlaneHFragments) +
-          FragArrayOffset;
-
-        ChLocalsPtr1 = &ppi->ChLocals[Row1Mod3 * BlockRowPixels];
-        PixelsChangedPtr1 =
-          &ppi->PixelChangedMap[(Row1Mod3) * BlockRowPixels];
-
-        PixelScoresPtr1 = &ppi->PixelScores[(RowNumber1 % 4) * BlockRowPixels];
-
-        YUVDiffsPtr1 = &ppi->yuv_differences[Row1Mod3 * BlockRowPixels];
-        FragScoresPtr1 = &ppi->FragScores[ScoreFragIndex1];
-        DispFragPtr1 = &ppi->ScanDisplayFragments[ScoreFragIndex1];
-
-      }
-
-      if ( RowNumber2 >= 0 ){
-        ScoreFragIndex2 = (RowNumber2 * ppi->PlaneHFragments) +
-          FragArrayOffset;
-        ChLocalsPtr2 = (&ppi->ChLocals[Row2Mod3 * BlockRowPixels]);
-        YUVDiffsPtr2 = &ppi->yuv_differences[Row2Mod3 * BlockRowPixels];
-
-        PixelScoresPtr2 = &ppi->PixelScores[(RowNumber2 % 4) * BlockRowPixels];
-
-        FragScoresPtr2 =  &ppi->FragScores[ScoreFragIndex2];
-        DispFragPtr2 = &ppi->ScanDisplayFragments[ScoreFragIndex2];
-      }else{
-        ChLocalsPtr2 = NULL;
-      }
-    }else{
-      ChLocalsPtr1 = NULL;
-      ChLocalsPtr2 = NULL;
-    }
-
-    /* Fast break out test for obvious yes and no cases in this row of
-       blocks */
-    if ( i < ppi->PlaneVFragments ){
-      dsp_save_fpu (ppi->dsp);
-      UpdatedOrCandidateBlocks =
-        RowSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 );
-      UpdatedOrCandidateBlocks |=
-        ColSadScan( ppi, RawPlanePtr0, RawPlanePtr1, DispFragPtr0 );
-      dsp_restore_fpu (ppi->dsp);
-    }else{
-      /* Make sure we still call other functions if RowSadScan() disabled */
-      UpdatedOrCandidateBlocks = 1;
-    }
-
-    /* Consolidation and fast break ot tests at Row 1 level */
-    if ( (i > 0) && (RowNumber1 < ppi->PlaneVFragments) ){
-      /* Mark as coded any candidate block that lies adjacent to a
-         coded block. */
-      SadPass2( ppi, RowNumber1, DispFragPtr1 );
-
-      /* Check results of diff scan in last set of blocks. */
-      /* Eliminate NO cases and add in +SGC cases */
-      ConsolidateDiffScanResults( ppi, &ppi->FragDiffPixels[ScoreFragIndex1],
-                                  &ppi->SameGreyDirPixels[ScoreFragIndex1],
-                                  DispFragPtr1
-                                  );
-    }
-
-    for ( j = 0; j < VFRAGPIXELS; j++ ){
-      /* Last two iterations do not apply */
-      if ( i < ppi->PlaneVFragments ){
-        /* Is the current fragment at an edge. */
-        EdgeRow = ( ( (i == 0) && (j == 0) ) ||
-                    ( (i == (ppi->PlaneVFragments - 1)) &&
-                      (j == (VFRAGPIXELS - 1)) ) );
-
-        /* Clear the arrays that will be used for the changed pixels maps */
-        memset( PixelsChangedPtr0, 0, ppi->PlaneWidth );
-
-        /* Difference scan and map each row */
-        if ( UpdatedOrCandidateBlocks ){
-          /* Scan the row for interesting differences */
-          /* Also clear the array that will be used for changed locals map */
-          RowDiffScan( ppi, RawPlanePtr0, RawPlanePtr1,
-                       YUVDiffsPtr, PixelsChangedPtr0,
-                       &ppi->SameGreyDirPixels[FragIndex],
-                       DispFragPtr0, &ppi->FragDiffPixels[FragIndex],
-                       RowDiffsPtr, ChLocalsPtr0, EdgeRow);
-        }else{
-          /* Clear the array that will be used for changed locals map */
-          memset( ChLocalsPtr0, 0, ppi->PlaneWidth );
-        }
-
-        /* The actual image plane pointers must be incremented by
-           stride as this may be different (more) than the plane
-           width. Our own internal buffers use ppi->PlaneWidth. */
-        RawPlanePtr0 += ppi->PlaneStride;
-        RawPlanePtr1 += ppi->PlaneStride;
-        PixelsChangedPtr0 += ppi->PlaneWidth;
-        ChLocalsPtr0 += ppi->PlaneWidth;
-        YUVDiffsPtr += ppi->PlaneWidth;
-        RowDiffsPtr++;
-      }
-
-      /* Run behind calculating the changed locals data and noise scores. */
-      if ( ChLocalsPtr1 != NULL ){
-        /* Last few iterations do not apply */
-        if ( RowNumber1 < ppi->PlaneVFragments ){
-          /* Blank the next row in the pixel scores data structure. */
-          memset( PixelScoresPtr1, 0, ppi->PlaneWidth );
-
-          /* Don't bother doing anything if there are no changed
-             pixels in this row */
-          if ( *RowDiffsPtr1 ){
-            /* Last valid row is a special case */
-            if ( i < ppi->PlaneVFragments )
-              RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1,
-                                    DispFragPtr1,
-                                    ( (((i-1)==0) && (j==0)) ?
-                                      FIRST_ROW : NOT_EDGE_ROW) );
-            else
-              RowChangedLocalsScan( ppi, PixelsChangedPtr1, ChLocalsPtr1,
-                                    DispFragPtr1,
-                                    ((j==(VFRAGPIXELS-1)) ?
-                                     LAST_ROW : NOT_EDGE_ROW) );
-
-            NoiseScoreRow( ppi, PixelsChangedPtr1, ChLocalsPtr1, YUVDiffsPtr1,
-                           PixelScoresPtr1, FragScoresPtr1, DispFragPtr1,
-                           RowDiffsPtr1 );
-          }
-
-          ChLocalsPtr1 += ppi->PlaneWidth;
-          PixelsChangedPtr1 += ppi->PlaneWidth;
-          YUVDiffsPtr1 += ppi->PlaneWidth;
-          PixelScoresPtr1 += ppi->PlaneWidth;
-          RowDiffsPtr1 ++;
-        }
-
-        /* Run edge enhancement algorithms */
-        if ( RowNumber2 < ppi->PlaneVFragments ){
-          if ( ChLocalsPtr2 != NULL ){
-            /* Don't bother doing anything if there are no changed
-               pixels in this row */
-            if ( *RowDiffsPtr2 ){
-              if ( RowNumber1 < ppi->PlaneVFragments ){
-                PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
-                                     PixelScoresPtr2, FragScoresPtr2,
-                                     DispFragPtr2,
-                                     ( (((i-2)==0) && (j==0)) ?
-                                       FIRST_ROW : NOT_EDGE_ROW)  );
-              }else{
-                /* Edge enhancement */
-                PrimaryEdgeScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
-                                     PixelScoresPtr2, FragScoresPtr2,
-                                     DispFragPtr2,
-                                     ((j==(VFRAGPIXELS-1)) ?
-                                      LAST_ROW : NOT_EDGE_ROW) );
-              }
-
-              /* Recursive line search */
-              LineSearchScoreRow( ppi, ChLocalsPtr2, YUVDiffsPtr2,
-                                  PixelScoresPtr2, FragScoresPtr2,
-                                  DispFragPtr2,
-                                  LineSearchRowNumber );
-            }
-
-            ChLocalsPtr2 += ppi->PlaneWidth;
-            YUVDiffsPtr2 += ppi->PlaneWidth;
-            PixelScoresPtr2 += ppi->PlaneWidth;
-            LineSearchRowNumber += 1;
-            RowDiffsPtr2 ++;
-          }
-        }
-      }
-    }
-
-    /* BAR algorithm */
-    if ( (RowNumber3 >= 0) && (RowNumber3 < ppi->PlaneVFragments) ){
-      ScoreFragIndex3 = (RowNumber3 * ppi->PlaneHFragments) + FragArrayOffset;
-      RowBarEnhBlockMap(ppi,
-                        &ppi->ScanDisplayFragments[ScoreFragIndex3],
-                        &ppi->BarBlockMap[(RowNumber3 % 3) *
-                                         ppi->PlaneHFragments],
-                        RowNumber3 );
-    }
-
-    /* BAR copy back and "ppi->SRF filtering" or "pixel copy back" */
-    if ( (RowNumber4 >= 0) && (RowNumber4 < ppi->PlaneVFragments) ){
-      /* BAR copy back stage must lag by one more row to avoid BAR blocks
-         being used in BAR descisions. */
-      ScoreFragIndex4 = (RowNumber4 * ppi->PlaneHFragments) + FragArrayOffset;
-
-      BarCopyBack(ppi, &ppi->ScanDisplayFragments[ScoreFragIndex4],
-                  &ppi->BarBlockMap[(RowNumber4 % 3) * ppi->PlaneHFragments]);
-
-      /* Copy over the data from any blocks marked for update into the
-         output buffer. */
-      RowCopy(ppi, ScoreFragIndex4);
-    }
-  }
-}
-
-ogg_uint32_t YUVAnalyseFrame( PP_INSTANCE *ppi, ogg_uint32_t * KFIndicator ){
-
-  /* Initialise the map arrays. */
-  InitScanMapArrays(ppi);
-
-  /* If the motion level in the previous frame was high then adjust
-     the high and low SAD thresholds to speed things up. */
-  ppi->ModifiedGrpLowSadThresh = ppi->GrpLowSadThresh;
-  ppi->ModifiedGrpHighSadThresh = ppi->GrpHighSadThresh;
-
-
-  /* Set up the internal plane height and width variables. */
-  ppi->VideoYPlaneWidth = ppi->ScanConfig.VideoFrameWidth;
-  ppi->VideoYPlaneHeight = ppi->ScanConfig.VideoFrameHeight;
-  ppi->VideoUVPlaneWidth = ppi->ScanConfig.VideoFrameWidth / 2;
-  ppi->VideoUVPlaneHeight = ppi->ScanConfig.VideoFrameHeight / 2;
-
-  /* To start with the strides will be set from the widths */
-  ppi->VideoYPlaneStride = ppi->VideoYPlaneWidth;
-  ppi->VideoUPlaneStride = ppi->VideoUVPlaneWidth;
-  ppi->VideoVPlaneStride = ppi->VideoUVPlaneWidth;
-
-  /* Set up the plane pointers */
-  ppi->YPlanePtr0 = ppi->ScanConfig.Yuv0ptr;
-  ppi->YPlanePtr1 = ppi->ScanConfig.Yuv1ptr;
-  ppi->UPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels);
-  ppi->UPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels);
-  ppi->VPlanePtr0 = (ppi->ScanConfig.Yuv0ptr + ppi->YFramePixels +
-                     ppi->UVFramePixels);
-  ppi->VPlanePtr1 = (ppi->ScanConfig.Yuv1ptr + ppi->YFramePixels +
-                     ppi->UVFramePixels);
-
-  /* Check previous frame lists and if necessary mark extra blocks for
-     update. */
-  SetFromPrevious(ppi);
-
-  /* Ananlyse the U and V palnes. */
-  AnalysePlane( ppi, ppi->UPlanePtr0, ppi->UPlanePtr1,
-                ppi->ScanYPlaneFragments, ppi->VideoUVPlaneWidth,
-                ppi->VideoUVPlaneHeight, ppi->VideoUPlaneStride );
-  AnalysePlane( ppi, ppi->VPlanePtr0, ppi->VPlanePtr1,
-                (ppi->ScanYPlaneFragments + ppi->ScanUVPlaneFragments),
-                ppi->VideoUVPlaneWidth, ppi->VideoUVPlaneHeight,
-                ppi->VideoVPlaneStride );
-
-  /* Now analyse the Y plane. */
-  AnalysePlane( ppi, ppi->YPlanePtr0, ppi->YPlanePtr1, 0,
-                ppi->VideoYPlaneWidth, ppi->VideoYPlaneHeight,
-                ppi->VideoYPlaneStride );
-
-  /* Update the list of previous frame block updates. */
-  UpdatePreviousBlockLists(ppi);
-
-  /* Create an output block map for the calling process. */
-  CreateOutputDisplayMap( ppi, ppi->ScanDisplayFragments,
-                          ppi->PrevFragments[0],
-                          ppi->ScanConfig.disp_fragments );
-
-  /* Set the candidate key frame indicator (0-100) */
-  *KFIndicator = ppi->KFIndicator;
-
-  /* Return the normalised block count (this is actually a motion
-     level weighting not a true block count). */
-  return ppi->OutputBlocksUpdated;
-}
-

+ 0 - 40
Engine/lib/libtheora/lib/enc/toplevel_lookup.h

@@ -1,40 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: toplevel_lookup.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#include "codec_internal.h"
-
-const ogg_uint32_t PriorKeyFrameWeight[KEY_FRAME_CONTEXT] = { 1,2,3,4,5 };
-
-/* Data structures controlling addition of residue blocks */
-const ogg_uint32_t ResidueErrorThresh[Q_TABLE_SIZE] =  {
-  750, 700, 650, 600, 590, 580, 570, 560,
-  550, 540, 530, 520, 510, 500, 490, 480,
-  470, 460, 450, 440, 430, 420, 410, 400,
-  390, 380, 370, 360, 350, 340, 330, 320,
-  310, 300, 290, 280, 270, 260, 250, 245,
-  240, 235, 230, 225, 220, 215, 210, 205,
-  200, 195, 190, 185, 180, 175, 170, 165,
-  160, 155, 150, 145, 140, 135, 130, 130 };
-const ogg_uint32_t ResidueBlockFactor[Q_TABLE_SIZE] =  {
-  3,   3,   3,   3,   3,   3,   3,   3,
-  3,   3,   3,   3,   3,   3,   3,   3,
-  3,   3,   3,   3,   3,   3,   3,   3,
-  3,   3,   3,   3,   3,   3,   3,   3,
-  2,   2,   2,   2,   2,   2,   2,   2,
-  2,   2,   2,   2,   2,   2,   2,   2,
-  2,   2,   2,   2,   2,   2,   2,   2,
-  2,   2,   2,   2,   2,   2,   2,   2 };

+ 0 - 409
Engine/lib/libtheora/lib/enc/x86_32/dct_decode_mmx.c

@@ -1,409 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dct_decode_mmx.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V3=
- 0x0003000300030003LL;
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V4=
- 0x0004000400040004LL;
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
-                          const ogg_int16_t *_ll){
-  long esi;
-  _pix-=_ystride*2;
-  __asm__ __volatile__(
-    /*mm0=0*/
-    "pxor %%mm0,%%mm0\n\t"
-    /*esi=_ystride*3*/
-    "lea (%[ystride],%[ystride],2),%[s]\n\t"
-    /*mm7=_pix[0...8]*/
-    "movq (%[pix]),%%mm7\n\t"
-    /*mm4=_pix[0...8+_ystride*3]*/
-    "movq (%[pix],%[s]),%%mm4\n\t"
-    /*mm6=_pix[0...8]*/
-    "movq %%mm7,%%mm6\n\t"
-    /*Expand unsigned _pix[0...3] to 16 bits.*/
-    "punpcklbw %%mm0,%%mm6\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    /*Expand unsigned _pix[4...8] to 16 bits.*/
-    "punpckhbw %%mm0,%%mm7\n\t"
-    /*Expand other arrays too.*/
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm5\n\t"
-    /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
-    "psubw %%mm4,%%mm6\n\t"
-    "psubw %%mm5,%%mm7\n\t"
-    /*mm5=mm4=_pix[0...8+_ystride]*/
-    "movq (%[pix],%[ystride]),%%mm4\n\t"
-    /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
-    "movq (%[pix],%[ystride],2),%%mm2\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    "movq %%mm2,%%mm3\n\t"
-    "movq %%mm2,%%mm1\n\t"
-    /*Expand these arrays.*/
-    "punpckhbw %%mm0,%%mm5\n\t"
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm3\n\t"
-    "punpcklbw %%mm0,%%mm2\n\t"
-    /*Preload...*/
-    "movq %[OC_V3],%%mm0\n\t"
-    /*mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
-    "psubw %%mm5,%%mm3\n\t"
-    "psubw %%mm4,%%mm2\n\t"
-    /*Scale by 3.*/
-    "pmullw %%mm0,%%mm3\n\t"
-    "pmullw %%mm0,%%mm2\n\t"
-    /*Preload...*/
-    "movq %[OC_V4],%%mm0\n\t"
-    /*f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
-       3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
-    "paddw %%mm7,%%mm3\n\t"
-    "paddw %%mm6,%%mm2\n\t"
-    /*Add 4.*/
-    "paddw %%mm0,%%mm3\n\t"
-    "paddw %%mm0,%%mm2\n\t"
-    /*"Divide" by 8.*/
-    "psraw $3,%%mm3\n\t"
-    "psraw $3,%%mm2\n\t"
-    /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
-    /*Free up mm5.*/
-    "packuswb %%mm5,%%mm4\n\t"
-    /*mm0=L L L L*/
-    "movq (%[ll]),%%mm0\n\t"
-    /*if(R_i<-2L||R_i>2L)R_i=0:*/
-    "movq %%mm2,%%mm5\n\t"
-    "pxor %%mm6,%%mm6\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "psubw %%mm0,%%mm6\n\t"
-    "psllw $1,%%mm7\n\t"
-    "psllw $1,%%mm6\n\t"
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    "pcmpgtw %%mm2,%%mm7\n\t"
-    "pcmpgtw %%mm6,%%mm5\n\t"
-    "pand %%mm7,%%mm2\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "pand %%mm5,%%mm2\n\t"
-    "psllw $1,%%mm7\n\t"
-    "movq %%mm3,%%mm5\n\t"
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    "pcmpgtw %%mm3,%%mm7\n\t"
-    "pcmpgtw %%mm6,%%mm5\n\t"
-    "pand %%mm7,%%mm3\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "pand %%mm5,%%mm3\n\t"
-    /*if(R_i<-L)R_i'=R_i+2L;
-      if(R_i>L)R_i'=R_i-2L;
-      if(R_i<-L||R_i>L)R_i=-R_i':*/
-    "psraw $1,%%mm6\n\t"
-    "movq %%mm2,%%mm5\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm5=R_i>L?FF:00*/
-    "pcmpgtw %%mm0,%%mm5\n\t"
-    /*mm6=-L>R_i?FF:00*/
-    "pcmpgtw %%mm2,%%mm6\n\t"
-    /*mm7=R_i>L?2L:0*/
-    "pand %%mm5,%%mm7\n\t"
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm7,%%mm2\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    /*mm5=-L>R_i||R_i>L*/
-    "por %%mm6,%%mm5\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm7=-L>R_i?2L:0*/
-    "pand %%mm6,%%mm7\n\t"
-    "pxor %%mm6,%%mm6\n\t"
-    /*mm2=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm7,%%mm2\n\t"
-    "psubw %%mm0,%%mm6\n\t"
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    "pand %%mm2,%%mm5\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm5,%%mm2\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm5,%%mm2\n\t"
-    "movq %%mm3,%%mm5\n\t"
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm6=-L>R_i?FF:00*/
-    "pcmpgtw %%mm3,%%mm6\n\t"
-    /*mm5=R_i>L?FF:00*/
-    "pcmpgtw %%mm0,%%mm5\n\t"
-    /*mm7=R_i>L?2L:0*/
-    "pand %%mm5,%%mm7\n\t"
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm7,%%mm3\n\t"
-    "psllw $1,%%mm0\n\t"
-    /*mm5=-L>R_i||R_i>L*/
-    "por %%mm6,%%mm5\n\t"
-    /*mm0=-L>R_i?2L:0*/
-    "pand %%mm6,%%mm0\n\t"
-    /*mm3=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm0,%%mm3\n\t"
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    "pand %%mm3,%%mm5\n\t"
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm5,%%mm3\n\t"
-    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm5,%%mm3\n\t"
-    /*Unfortunately, there's no unsigned byte+signed byte with unsigned
-       saturation op code, so we have to promote things back 16 bits.*/
-    "pxor %%mm0,%%mm0\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm5\n\t"
-    "movq %%mm1,%%mm6\n\t"
-    "punpcklbw %%mm0,%%mm1\n\t"
-    "punpckhbw %%mm0,%%mm6\n\t"
-    /*_pix[0...8+_ystride]+=R_i*/
-    "paddw %%mm2,%%mm4\n\t"
-    "paddw %%mm3,%%mm5\n\t"
-    /*_pix[0...8+_ystride*2]-=R_i*/
-    "psubw %%mm2,%%mm1\n\t"
-    "psubw %%mm3,%%mm6\n\t"
-    "packuswb %%mm5,%%mm4\n\t"
-    "packuswb %%mm6,%%mm1\n\t"
-    /*Write it back out.*/
-    "movq %%mm4,(%[pix],%[ystride])\n\t"
-    "movq %%mm1,(%[pix],%[ystride],2)\n\t"
-    :[s]"=&S"(esi)
-    :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll),
-     [OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
-    :"memory"
-  );
-}
-
-/*This code implements the bulk of loop_filter_h().
-  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
-   four p0's to one register we must transpose the values in four mmx regs.
-  When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
-                           const ogg_int16_t *_ll){
-  long esi;
-  long edi;
-  __asm__ __volatile__(
-    /*x x x x 3 2 1 0*/
-    "movd (%[pix]),%%mm0\n\t"
-    /*esi=_ystride*3*/
-    "lea (%[ystride],%[ystride],2),%[s]\n\t"
-    /*x x x x 7 6 5 4*/
-    "movd (%[pix],%[ystride]),%%mm1\n\t"
-    /*x x x x B A 9 8*/
-    "movd (%[pix],%[ystride],2),%%mm2\n\t"
-    /*x x x x F E D C*/
-    "movd (%[pix],%[s]),%%mm3\n\t"
-    /*mm0=7 3 6 2 5 1 4 0*/
-    "punpcklbw %%mm1,%%mm0\n\t"
-    /*mm2=F B E A D 9 C 8*/
-    "punpcklbw %%mm3,%%mm2\n\t"
-    /*mm1=7 3 6 2 5 1 4 0*/
-    "movq %%mm0,%%mm1\n\t"
-    /*mm0=F B 7 3 E A 6 2*/
-    "punpckhwd %%mm2,%%mm0\n\t"
-    /*mm1=D 9 5 1 C 8 4 0*/
-    "punpcklwd %%mm2,%%mm1\n\t"
-    "pxor %%mm7,%%mm7\n\t"
-    /*mm5=D 9 5 1 C 8 4 0*/
-    "movq %%mm1,%%mm5\n\t"
-    /*mm1=x C x 8 x 4 x 0==pix[0]*/
-    "punpcklbw %%mm7,%%mm1\n\t"
-    /*mm5=x D x 9 x 5 x 1==pix[1]*/
-    "punpckhbw %%mm7,%%mm5\n\t"
-    /*mm3=F B 7 3 E A 6 2*/
-    "movq %%mm0,%%mm3\n\t"
-    /*mm0=x E x A x 6 x 2==pix[2]*/
-    "punpcklbw %%mm7,%%mm0\n\t"
-    /*mm3=x F x B x 7 x 3==pix[3]*/
-    "punpckhbw %%mm7,%%mm3\n\t"
-    /*mm1=mm1-mm3==pix[0]-pix[3]*/
-    "psubw %%mm3,%%mm1\n\t"
-    /*Save a copy of pix[2] for later.*/
-    "movq %%mm0,%%mm4\n\t"
-    /*mm0=mm0-mm5==pix[2]-pix[1]*/
-    "psubw %%mm5,%%mm0\n\t"
-    /*Scale by 3.*/
-    "pmullw %[OC_V3],%%mm0\n\t"
-    /*f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
-    "paddw %%mm1,%%mm0\n\t"
-    /*Add 4.*/
-    "paddw %[OC_V4],%%mm0\n\t"
-    /*"Divide" by 8, producing the residuals R_i.*/
-    "psraw $3,%%mm0\n\t"
-    /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
-    /*mm6=L L L L*/
-    "movq (%[ll]),%%mm6\n\t"
-    /*if(R_i<-2L||R_i>2L)R_i=0:*/
-    "movq %%mm0,%%mm1\n\t"
-    "pxor %%mm2,%%mm2\n\t"
-    "movq %%mm6,%%mm3\n\t"
-    "psubw %%mm6,%%mm2\n\t"
-    "psllw $1,%%mm3\n\t"
-    "psllw $1,%%mm2\n\t"
-    /*mm0==R_3 R_2 R_1 R_0*/
-    /*mm1==R_3 R_2 R_1 R_0*/
-    /*mm2==-2L -2L -2L -2L*/
-    /*mm3==2L 2L 2L 2L*/
-    "pcmpgtw %%mm0,%%mm3\n\t"
-    "pcmpgtw %%mm2,%%mm1\n\t"
-    "pand %%mm3,%%mm0\n\t"
-    "pand %%mm1,%%mm0\n\t"
-    /*if(R_i<-L)R_i'=R_i+2L;
-      if(R_i>L)R_i'=R_i-2L;
-      if(R_i<-L||R_i>L)R_i=-R_i':*/
-    "psraw $1,%%mm2\n\t"
-    "movq %%mm0,%%mm1\n\t"
-    "movq %%mm6,%%mm3\n\t"
-    /*mm0==R_3 R_2 R_1 R_0*/
-    /*mm1==R_3 R_2 R_1 R_0*/
-    /*mm2==-L -L -L -L*/
-    /*mm6==L L L L*/
-    /*mm2=-L>R_i?FF:00*/
-    "pcmpgtw %%mm0,%%mm2\n\t"
-    /*mm1=R_i>L?FF:00*/
-    "pcmpgtw %%mm6,%%mm1\n\t"
-    /*mm3=2L 2L 2L 2L*/
-    "psllw $1,%%mm3\n\t"
-    /*mm6=2L 2L 2L 2L*/
-    "psllw $1,%%mm6\n\t"
-    /*mm3=R_i>L?2L:0*/
-    "pand %%mm1,%%mm3\n\t"
-    /*mm6=-L>R_i?2L:0*/
-    "pand %%mm2,%%mm6\n\t"
-    /*mm0=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm3,%%mm0\n\t"
-    /*mm1=-L>R_i||R_i>L*/
-    "por %%mm2,%%mm1\n\t"
-    /*mm0=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm6,%%mm0\n\t"
-    /*mm1=-L>R_i||R_i>L?R_i':0*/
-    "pand %%mm0,%%mm1\n\t"
-    /*mm0=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm1,%%mm0\n\t"
-    /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm1,%%mm0\n\t"
-    /*_pix[1]+=R_i;*/
-    "paddw %%mm0,%%mm5\n\t"
-    /*_pix[2]-=R_i;*/
-    "psubw %%mm0,%%mm4\n\t"
-    /*mm5=x x x x D 9 5 1*/
-    "packuswb %%mm7,%%mm5\n\t"
-    /*mm4=x x x x E A 6 2*/
-    "packuswb %%mm7,%%mm4\n\t"
-    /*mm5=E D A 9 6 5 2 1*/
-    "punpcklbw %%mm4,%%mm5\n\t"
-    /*edi=6 5 2 1*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix])\n\t"
-    /*Why is there such a big stall here?*/
-    "psrlq $32,%%mm5\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride])\n\t"
-    /*edi=E D A 9*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride],2)\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[s])\n\t"
-    :[s]"=&S"(esi),[d]"=&D"(edi),
-     [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
-    :[OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
-    :"memory"
-  );
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
-                          const ogg_int16_t *_ll){
-  _pix-=2;
-  loop_filter_h4(_pix,_ystride,_ll);
-  loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){
-  int j;
-  ogg_int16_t __attribute__((aligned(8)))  ll[4];
-  unsigned char *cp = pbi->display_fragments;
-  ogg_uint32_t *bp = pbi->recon_pixel_index_table;
-
-  if ( FLimit == 0 ) return;
-  ll[0]=ll[1]=ll[2]=ll[3]=FLimit;
-
-  for ( j = 0; j < 3 ; j++){
-    ogg_uint32_t *bp_begin = bp;
-    ogg_uint32_t *bp_end;
-    int stride;
-    int h;
-
-    switch(j) {
-    case 0: /* y */
-      bp_end = bp + pbi->YPlaneFragments;
-      h = pbi->HFragments;
-      stride = pbi->YStride;
-      break;
-    default: /* u,v, 4:20 specific */
-      bp_end = bp + pbi->UVPlaneFragments;
-      h = pbi->HFragments >> 1;
-      stride = pbi->UVStride;
-      break;
-    }
-
-    while(bp<bp_end){
-      ogg_uint32_t *bp_left = bp;
-      ogg_uint32_t *bp_right = bp + h;
-      while(bp<bp_right){
-        if(cp[0]){
-          if(bp>bp_left)
-            loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll);
-          if(bp_left>bp_begin)
-            loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll);
-          if(bp+1<bp_right && !cp[1])
-            loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,ll);
-          if(bp+h<bp_end && !cp[h])
-            loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,ll);
-        }
-        bp++;
-        cp++;
-      }
-    }
-  }
-
-  __asm__ __volatile__("emms\n\t");
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_dct_decode_init(DspFunctions *funcs)
-{
-  funcs->LoopFilter = loop_filter_mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 666
Engine/lib/libtheora/lib/enc/x86_32/dsp_mmx.c

@@ -1,666 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dsp_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL;
-
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-
-#define SUB_LOOP                                                              \
-  "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */                   \
-  "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr */                  \
-  "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */\
-  "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */\
-  /* convert from UINT8 to INT16 */                                           \
-  "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */            \
-  "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr) */           \
-  "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */            \
-  "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr) */           \
-  /* start calculation */                                                     \
-  "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ReconPtr */        \
-  "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ReconPtr */        \
-  "  movq        %%mm0,  (%2)     \n\t" /* write answer out */                \
-  "  movq        %%mm2, 8(%2)     \n\t" /* write answer out */                \
-  /* Increment pointers */                                                    \
-  "  add         $16, %2           \n\t"                                      \
-  "  add         %3, %0           \n\t"                                       \
-  "  add         %4, %1           \n\t"
-
-static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
-                  ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
-                  ogg_uint32_t ReconPixelsPerLine)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm7, %%mm7     \n\t"
-    SUB_LOOP
-    SUB_LOOP
-    SUB_LOOP
-    SUB_LOOP
-    SUB_LOOP
-    SUB_LOOP
-    SUB_LOOP
-    SUB_LOOP
-     : "+r" (FiltPtr),
-       "+r" (ReconPtr),
-       "+r" (DctInputPtr)
-     : "m" (PixelsPerLine),
-       "m" (ReconPixelsPerLine)
-     : "memory"
-  );
-}
-
-#define SUB_128_LOOP                                                          \
-  "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */                   \
-  "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */\
-  /* convert from UINT8 to INT16 */                                           \
-  "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */            \
-  "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */            \
-  /* start calculation */                                                     \
-  "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - 128 */             \
-  "  psubw       %%mm1, %%mm2     \n\t" /* mm2 = FiltPtr - 128 */             \
-  "  movq        %%mm0,  (%1)     \n\t" /* write answer out */                \
-  "  movq        %%mm2, 8(%1)     \n\t" /* write answer out */                \
-  /* Increment pointers */                                                    \
-  "  add         $16, %1           \n\t"                                      \
-  "  add         %2, %0           \n\t"
-
-
-static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
-                      ogg_uint32_t PixelsPerLine)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  movq        %[V128], %%mm1   \n\t"
-    SUB_128_LOOP
-    SUB_128_LOOP
-    SUB_128_LOOP
-    SUB_128_LOOP
-    SUB_128_LOOP
-    SUB_128_LOOP
-    SUB_128_LOOP
-    SUB_128_LOOP
-     : "+r" (FiltPtr),
-       "+r" (DctInputPtr)
-     : "m" (PixelsPerLine),
-       [V128] "m" (V128)
-     : "memory"
-  );
-}
-
-#define SUB_AVG2_LOOP                                                         \
-  "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */                   \
-  "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr1 */                 \
-  "  movq        (%2), %%mm4      \n\t" /* mm1 = ReconPtr2 */                 \
-  "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */\
-  "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */\
-  "  movq        %%mm4, %%mm5     \n\t" /* dup to prepare for up conversion */\
-  /* convert from UINT8 to INT16 */                                           \
-  "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */            \
-  "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr1) */          \
-  "  punpcklbw   %%mm7, %%mm4     \n\t" /* mm1 = INT16(ReconPtr2) */          \
-  "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */            \
-  "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr1) */          \
-  "  punpckhbw   %%mm7, %%mm5     \n\t" /* mm3 = INT16(ReconPtr2) */          \
-  /* average ReconPtr1 and ReconPtr2 */                                       \
-  "  paddw       %%mm4, %%mm1     \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */     \
-  "  paddw       %%mm5, %%mm3     \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */     \
-  "  psrlw       $1, %%mm1        \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ \
-  "  psrlw       $1, %%mm3        \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ \
-  "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
-  "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \
-  "  movq        %%mm0,  (%3)     \n\t" /* write answer out */                \
-  "  movq        %%mm2, 8(%3)     \n\t" /* write answer out */                \
-  /* Increment pointers */                                                    \
-  "  add         $16, %3           \n\t"                                      \
-  "  add         %4, %0           \n\t"                                       \
-  "  add         %5, %1           \n\t"                                       \
-  "  add         %5, %2           \n\t"
-
-
-static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
-                     unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
-                     ogg_uint32_t PixelsPerLine,
-                     ogg_uint32_t ReconPixelsPerLine)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm7, %%mm7     \n\t"
-    SUB_AVG2_LOOP
-    SUB_AVG2_LOOP
-    SUB_AVG2_LOOP
-    SUB_AVG2_LOOP
-    SUB_AVG2_LOOP
-    SUB_AVG2_LOOP
-    SUB_AVG2_LOOP
-    SUB_AVG2_LOOP
-     : "+r" (FiltPtr),
-       "+r" (ReconPtr1),
-       "+r" (ReconPtr2),
-       "+r" (DctInputPtr)
-     : "m" (PixelsPerLine),
-       "m" (ReconPixelsPerLine)
-     : "memory"
-  );
-}
-
-static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
-{
-  ogg_uint32_t MaxSad;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm6, %%mm6     \n\t"       /* zero out mm6 for unpack */
-    "  pxor        %%mm7, %%mm7     \n\t"       /* zero out mm7 for unpack */
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%2), %%mm1      \n\t"
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"       /* ; unpack low four bytes to higher precision */
-    "  punpckhbw   %%mm7, %%mm1     \n\t"       /* ; unpack high four bytes to higher precision */
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  movq        %%mm1, %%mm3     \n\t"
-    "  psrlq       $32, %%mm2       \n\t"       /* fold and add */
-    "  psrlq       $32, %%mm3       \n\t"
-    "  paddw       %%mm2, %%mm0     \n\t"
-    "  paddw       %%mm3, %%mm1     \n\t"
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  movq        %%mm1, %%mm3     \n\t"
-    "  psrlq       $16, %%mm2       \n\t"
-    "  psrlq       $16, %%mm3       \n\t"
-    "  paddw       %%mm2, %%mm0     \n\t"
-    "  paddw       %%mm3, %%mm1     \n\t"
-
-    "  psubusw     %%mm0, %%mm1     \n\t"
-    "  paddw       %%mm0, %%mm1     \n\t"       /* mm1 = max(mm1, mm0) */
-    "  movd        %%mm1, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=m" (MaxSad),
-       "+r" (Src1),
-       "+r" (Src2)
-     :
-     : "memory"
-  );
-  return MaxSad;
-}
-
-static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
-                                    ogg_uint32_t stride)
-{
-  ogg_uint32_t MaxSad;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm3, %%mm3     \n\t"       /* zero out mm3 for unpack */
-    "  pxor        %%mm4, %%mm4     \n\t"       /* mm4 low sum */
-    "  pxor        %%mm5, %%mm5     \n\t"       /* mm5 high sum */
-    "  pxor        %%mm6, %%mm6     \n\t"       /* mm6 low sum */
-    "  pxor        %%mm7, %%mm7     \n\t"       /* mm7 high sum */
-    "  mov         $4, %%edi        \n\t"       /* 4 rows */
-    "1:                             \n\t"
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%2), %%mm1      \n\t"       /* take 8 bytes */
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm3, %%mm0     \n\t"       /* unpack to higher precision for accumulation */
-    "  paddw       %%mm0, %%mm4     \n\t"       /* accumulate difference... */
-    "  punpckhbw   %%mm3, %%mm1     \n\t"       /* unpack high four bytes to higher precision */
-    "  paddw       %%mm1, %%mm5     \n\t"       /* accumulate difference... */
-    "  add         %3, %1           \n\t"       /* Inc pointer into the new data */
-    "  add         %3, %2           \n\t"       /* Inc pointer into the new data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  mov         $4, %%edi        \n\t"       /* 4 rows */
-    "2:                             \n\t"
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%2), %%mm1      \n\t"       /* take 8 bytes */
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm3, %%mm0     \n\t"       /* unpack to higher precision for accumulation */
-    "  paddw       %%mm0, %%mm6     \n\t"       /* accumulate difference... */
-    "  punpckhbw   %%mm3, %%mm1     \n\t"       /* unpack high four bytes to higher precision */
-    "  paddw       %%mm1, %%mm7     \n\t"       /* accumulate difference... */
-    "  add         %3, %1           \n\t"       /* Inc pointer into the new data */
-    "  add         %3, %2           \n\t"       /* Inc pointer into the new data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 2b                       \n\t"
-
-    "  psubusw     %%mm6, %%mm7     \n\t"
-    "  paddw       %%mm6, %%mm7     \n\t"       /* mm7 = max(mm7, mm6) */
-    "  psubusw     %%mm4, %%mm5     \n\t"
-    "  paddw       %%mm4, %%mm5     \n\t"       /* mm5 = max(mm5, mm4) */
-    "  psubusw     %%mm5, %%mm7     \n\t"
-    "  paddw       %%mm5, %%mm7     \n\t"       /* mm7 = max(mm5, mm7) */
-    "  movq        %%mm7, %%mm6     \n\t"
-    "  psrlq       $32, %%mm6       \n\t"
-    "  psubusw     %%mm6, %%mm7     \n\t"
-    "  paddw       %%mm6, %%mm7     \n\t"       /* mm7 = max(mm5, mm7) */
-    "  movq        %%mm7, %%mm6     \n\t"
-    "  psrlq       $16, %%mm6       \n\t"
-    "  psubusw     %%mm6, %%mm7     \n\t"
-    "  paddw       %%mm6, %%mm7     \n\t"       /* mm7 = max(mm5, mm7) */
-    "  movd        %%mm7, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=r" (MaxSad),
-       "+r" (Src1),
-       "+r" (Src2)
-     : "r" (stride)
-     : "memory", "edi"
-  );
-
-  return MaxSad;
-}
-
-#define SAD_LOOP                                                              \
-  "  movq        (%1), %%mm0      \n\t" /* take 8 bytes */                    \
-  "  movq        (%2), %%mm1      \n\t"                                       \
-  "  movq        %%mm0, %%mm2     \n\t"                                       \
-  "  psubusb     %%mm1, %%mm0     \n\t"         /* A - B */                         \
-  "  psubusb     %%mm2, %%mm1     \n\t" /* B - A */                           \
-  "  por         %%mm1, %%mm0     \n\t" /* and or gives abs difference */     \
-  "  movq        %%mm0, %%mm1     \n\t"                                       \
-  "  punpcklbw   %%mm6, %%mm0     \n\t" /* unpack to higher precision for accumulation */ \
-  "  paddw       %%mm0, %%mm7     \n\t" /* accumulate difference... */        \
-  "  punpckhbw   %%mm6, %%mm1     \n\t" /* unpack high four bytes to higher precision */ \
-  "  add         %3, %1           \n\t" /* Inc pointer into the new data */   \
-  "  paddw       %%mm1, %%mm7     \n\t" /* accumulate difference... */        \
-  "  add         %4, %2           \n\t" /* Inc pointer into ref data */
-
-static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
-                            unsigned char *ptr2, ogg_uint32_t stride2)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"       /* zero out mm6 for unpack */
-    "  pxor        %%mm7, %%mm7     \n\t"       /* mm7 contains the result */
-    SAD_LOOP
-    SAD_LOOP
-    SAD_LOOP
-    SAD_LOOP
-    SAD_LOOP
-    SAD_LOOP
-    SAD_LOOP
-    SAD_LOOP
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddw       %%mm0, %%mm7     \n\t"
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $16, %%mm7       \n\t"
-    "  paddw       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=m" (DiffVal),
-       "+r" (ptr1),
-       "+r" (ptr2)
-     : "r" (stride1),
-       "r" (stride2)
-     : "memory"
-  );
-
-  return DiffVal;
-}
-
-static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
-                                  unsigned char *ptr2, ogg_uint32_t stride2,
-                                  ogg_uint32_t thres)
-{
-  return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
-}
-
-static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                      unsigned char *RefDataPtr1,
-                                      unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
-                                      ogg_uint32_t thres)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pcmpeqd     %%mm5, %%mm5     \n\t"       /* fefefefefefefefe in mm5 */
-    "  paddb       %%mm5, %%mm5     \n\t"
-
-    "  pxor        %%mm6, %%mm6     \n\t"       /* zero out mm6 for unpack */
-    "  pxor        %%mm7, %%mm7     \n\t"       /* mm7 contains the result */
-    "  mov         $8, %%edi        \n\t"       /* 8 rows */
-    "1:                             \n\t"
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-
-    "  movq        (%2), %%mm2      \n\t"
-    "  movq        (%3), %%mm3      \n\t"       /* take average of mm2 and mm3 */
-    "  movq        %%mm2, %%mm1     \n\t"
-    "  pand        %%mm3, %%mm1     \n\t"
-    "  pxor        %%mm2, %%mm3     \n\t"
-    "  pand        %%mm5, %%mm3     \n\t"
-    "  psrlq       $1, %%mm3        \n\t"
-    "  paddb       %%mm3, %%mm1     \n\t"
-
-    "  movq        %%mm0, %%mm2     \n\t"
-
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"       /* unpack to higher precision for accumulation */
-    "  paddw       %%mm0, %%mm7     \n\t"       /* accumulate difference... */
-    "  punpckhbw   %%mm6, %%mm1     \n\t"       /* unpack high four bytes to higher precision */
-    "  add         %4, %1           \n\t"       /* Inc pointer into the new data */
-    "  paddw       %%mm1, %%mm7     \n\t"       /* accumulate difference... */
-    "  add         %5, %2           \n\t"       /* Inc pointer into ref data */
-    "  add         %5, %3           \n\t"       /* Inc pointer into ref data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddw       %%mm0, %%mm7     \n\t"
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $16, %%mm7       \n\t"
-    "  paddw       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=m" (DiffVal),
-       "+r" (SrcData),
-       "+r" (RefDataPtr1),
-       "+r" (RefDataPtr2)
-     : "m" (SrcStride),
-       "m" (RefStride)
-     : "edi", "memory"
-  );
-
-  return DiffVal;
-}
-
-static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
-  ogg_uint32_t  XSum;
-  ogg_uint32_t  XXSum;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm5, %%mm5     \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  mov         $8, %%edi        \n\t"
-    "1:                             \n\t"
-    "  movq        (%2), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        %%mm0, %%mm2     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"
-    "  punpckhbw   %%mm6, %%mm2     \n\t"
-
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  paddw       %%mm2, %%mm5     \n\t"
-
-    "  pmaddwd     %%mm0, %%mm0     \n\t"
-    "  pmaddwd     %%mm2, %%mm2     \n\t"
-
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  paddd       %%mm2, %%mm7     \n\t"
-
-    "  add         %3, %2           \n\t"       /* Inc pointer into src data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $32, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $16, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movd        %%mm5, %%edi     \n\t"
-    "  movsx       %%di, %%edi      \n\t"
-    "  movl        %%edi, %0        \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %1        \n\t"
-
-     : "=r" (XSum),
-       "=r" (XXSum),
-       "+r" (DataPtr)
-     : "r" (Stride)
-     : "edi", "memory"
-  );
-
-  /* Compute population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                 unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
-  ogg_uint32_t  XSum;
-  ogg_uint32_t  XXSum;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm5, %%mm5     \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  mov         $8, %%edi        \n\t"
-    "1:                             \n\t"
-    "  movq        (%2), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%3), %%mm1      \n\t"
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  movq        %%mm1, %%mm3     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"
-    "  punpcklbw   %%mm6, %%mm1     \n\t"
-    "  punpckhbw   %%mm6, %%mm2     \n\t"
-    "  punpckhbw   %%mm6, %%mm3     \n\t"
-
-    "  psubsw      %%mm1, %%mm0     \n\t"
-    "  psubsw      %%mm3, %%mm2     \n\t"
-
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  paddw       %%mm2, %%mm5     \n\t"
-
-    "  pmaddwd     %%mm0, %%mm0     \n\t"
-    "  pmaddwd     %%mm2, %%mm2     \n\t"
-
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  paddd       %%mm2, %%mm7     \n\t"
-
-    "  add         %4, %2           \n\t"       /* Inc pointer into src data */
-    "  add         %5, %3           \n\t"       /* Inc pointer into ref data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $32, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $16, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movd        %%mm5, %%edi     \n\t"
-    "  movsx       %%di, %%edi      \n\t"
-    "  movl        %%edi, %0        \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %1        \n\t"
-
-     : "=m" (XSum),
-       "=m" (XXSum),
-       "+r" (SrcData),
-       "+r" (RefDataPtr)
-     : "m" (SrcStride),
-       "m" (RefStride)
-     : "edi", "memory"
-  );
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                     unsigned char *RefDataPtr1,
-                                     unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
-  ogg_uint32_t XSum;
-  ogg_uint32_t XXSum;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pcmpeqd     %%mm4, %%mm4     \n\t"       /* fefefefefefefefe in mm4 */
-    "  paddb       %%mm4, %%mm4     \n\t"
-    "  pxor        %%mm5, %%mm5     \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  mov         $8, %%edi        \n\t"
-    "1:                             \n\t"
-    "  movq        (%2), %%mm0      \n\t"       /* take 8 bytes */
-
-    "  movq        (%3), %%mm2      \n\t"
-    "  movq        (%4), %%mm3      \n\t"       /* take average of mm2 and mm3 */
-    "  movq        %%mm2, %%mm1     \n\t"
-    "  pand        %%mm3, %%mm1     \n\t"
-    "  pxor        %%mm2, %%mm3     \n\t"
-    "  pand        %%mm4, %%mm3     \n\t"
-    "  psrlq       $1, %%mm3        \n\t"
-    "  paddb       %%mm3, %%mm1     \n\t"
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  movq        %%mm1, %%mm3     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"
-    "  punpcklbw   %%mm6, %%mm1     \n\t"
-    "  punpckhbw   %%mm6, %%mm2     \n\t"
-    "  punpckhbw   %%mm6, %%mm3     \n\t"
-
-    "  psubsw      %%mm1, %%mm0     \n\t"
-    "  psubsw      %%mm3, %%mm2     \n\t"
-
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  paddw       %%mm2, %%mm5     \n\t"
-
-    "  pmaddwd     %%mm0, %%mm0     \n\t"
-    "  pmaddwd     %%mm2, %%mm2     \n\t"
-
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  paddd       %%mm2, %%mm7     \n\t"
-
-    "  add         %5, %2           \n\t"       /* Inc pointer into src data */
-    "  add         %6, %3           \n\t"       /* Inc pointer into ref data */
-    "  add         %6, %4           \n\t"       /* Inc pointer into ref data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $32, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $16, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movd        %%mm5, %%edi     \n\t"
-    "  movsx       %%di, %%edi      \n\t"
-    "  movl        %%edi, %0        \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %1        \n\t"
-
-     : "=m" (XSum),
-       "=m" (XXSum),
-       "+r" (SrcData),
-       "+r" (RefDataPtr1),
-       "+r" (RefDataPtr2)
-     : "m" (SrcStride),
-       "m" (RefStride)
-     : "edi", "memory"
-  );
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static void restore_fpu (void)
-{
-  __asm__ __volatile__ (
-    "  emms                         \n\t"
-  );
-}
-
-void dsp_mmx_init(DspFunctions *funcs)
-{
-  funcs->restore_fpu = restore_fpu;
-  funcs->sub8x8 = sub8x8__mmx;
-  funcs->sub8x8_128 = sub8x8_128__mmx;
-  funcs->sub8x8avg2 = sub8x8avg2__mmx;
-  funcs->row_sad8 = row_sad8__mmx;
-  funcs->col_sad8x8 = col_sad8x8__mmx;
-  funcs->sad8x8 = sad8x8__mmx;
-  funcs->sad8x8_thres = sad8x8_thres__mmx;
-  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
-  funcs->intra8x8_err = intra8x8_err__mmx;
-  funcs->inter8x8_err = inter8x8_err__mmx;
-  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 347
Engine/lib/libtheora/lib/enc/x86_32/dsp_mmxext.c

@@ -1,347 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dsp_mmxext.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-#define SAD_MMXEXT_LOOP \
- "  movq (%1), %%mm0             \n\t"  /* take 8 bytes */ \
- "  movq (%2), %%mm1             \n\t" \
- "  psadbw %%mm1, %%mm0          \n\t" \
- "  add %3, %1                   \n\t"  /* Inc pointer into the new data */ \
- "  paddw %%mm0, %%mm7           \n\t"  /* accumulate difference... */ \
- "  add %4, %2                   \n\t"  /* Inc pointer into ref data */
-
-
-static ogg_uint32_t sad8x8__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
-                            unsigned char *ptr2, ogg_uint32_t stride2)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-    "  pxor %%mm7, %%mm7            \n\t"       /* mm7 contains the result */
-
-    SAD_MMXEXT_LOOP
-    SAD_MMXEXT_LOOP
-    SAD_MMXEXT_LOOP
-    SAD_MMXEXT_LOOP
-    SAD_MMXEXT_LOOP
-    SAD_MMXEXT_LOOP
-    SAD_MMXEXT_LOOP
-
-    "  movq (%1), %%mm0             \n\t"       /* take 8 bytes */
-    "  movq (%2), %%mm1             \n\t"
-    "  psadbw %%mm1, %%mm0          \n\t"
-    "  paddw %%mm0, %%mm7           \n\t"       /* accumulate difference... */
-    "  movd %%mm7, %0               \n\t"
-
-     : "=r" (DiffVal),
-       "+r" (ptr1),
-       "+r" (ptr2)
-     : "r" (stride1),
-       "r" (stride2)
-     : "memory"
-  );
-
-  return DiffVal;
-}
-
-#define SAD_TRES_LOOP \
-  "  movq (%1), %%mm0             \n\t" /* take 8 bytes */ \
-  "  movq (%2), %%mm1             \n\t" \
-  "  psadbw %%mm1, %%mm0          \n\t" \
-  "  add %3, %1                   \n\t" /* Inc pointer into the new data */ \
-  "  paddw %%mm0, %%mm7           \n\t" /* accumulate difference... */ \
-  "  add %4, %2                   \n\t" /* Inc pointer into ref data */
-
-
-static ogg_uint32_t sad8x8_thres__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
-                                  unsigned char *ptr2, ogg_uint32_t stride2,
-                                  ogg_uint32_t thres)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-    "  pxor %%mm7, %%mm7            \n\t"       /* mm7 contains the result */
-
-    SAD_TRES_LOOP
-    SAD_TRES_LOOP
-    SAD_TRES_LOOP
-    SAD_TRES_LOOP
-    SAD_TRES_LOOP
-    SAD_TRES_LOOP
-    SAD_TRES_LOOP
-    SAD_TRES_LOOP
-
-    "  movd %%mm7, %0               \n\t"
-
-     : "=r" (DiffVal),
-       "+r" (ptr1),
-       "+r" (ptr2)
-     : "r" (stride1),
-       "r" (stride2)
-     : "memory"
-  );
-
-  return DiffVal;
-}
-
-#define SAD_XY2_TRES \
-  "  movq (%1), %%mm0             \n\t" /* take 8 bytes */ \
-  "  movq (%2), %%mm1             \n\t" \
-  "  movq (%3), %%mm2             \n\t" \
-  "  pavgb %%mm2, %%mm1           \n\t" \
-  "  psadbw %%mm1, %%mm0          \n\t" \
- \
-  "  add %4, %1                   \n\t" /* Inc pointer into the new data */ \
-  "  paddw %%mm0, %%mm7           \n\t" /* accumulate difference... */ \
-  "  add %5, %2                   \n\t" /* Inc pointer into ref data */ \
-  "  add %5, %3                   \n\t" /* Inc pointer into ref data */
-
-
-static ogg_uint32_t sad8x8_xy2_thres__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                      unsigned char *RefDataPtr1,
-                                      unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
-                                      ogg_uint32_t thres)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-    "  pxor %%mm7, %%mm7            \n\t"       /* mm7 contains the result */
-    SAD_XY2_TRES
-    SAD_XY2_TRES
-    SAD_XY2_TRES
-    SAD_XY2_TRES
-    SAD_XY2_TRES
-    SAD_XY2_TRES
-    SAD_XY2_TRES
-    SAD_XY2_TRES
-
-    "  movd %%mm7, %0               \n\t"
-     : "=m" (DiffVal),
-       "+r" (SrcData),
-       "+r" (RefDataPtr1),
-       "+r" (RefDataPtr2)
-     : "m" (SrcStride),
-       "m" (RefStride)
-     : "memory"
-  );
-
-  return DiffVal;
-}
-
-static ogg_uint32_t row_sad8__mmxext (unsigned char *Src1, unsigned char *Src2)
-{
-  ogg_uint32_t MaxSad;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  movd        (%1), %%mm0      \n\t"
-    "  movd        (%2), %%mm1      \n\t"
-    "  psadbw      %%mm0, %%mm1     \n\t"
-    "  movd        4(%1), %%mm2     \n\t"
-    "  movd        4(%2), %%mm3     \n\t"
-    "  psadbw      %%mm2, %%mm3     \n\t"
-
-    "  pmaxsw      %%mm1, %%mm3     \n\t"
-    "  movd        %%mm3, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=m" (MaxSad),
-       "+r" (Src1),
-       "+r" (Src2)
-     :
-     : "memory"
-  );
-
-  return MaxSad;
-}
-
-static ogg_uint32_t col_sad8x8__mmxext (unsigned char *Src1, unsigned char *Src2,
-                                    ogg_uint32_t stride)
-{
-  ogg_uint32_t MaxSad;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm3, %%mm3     \n\t"       /* zero out mm3 for unpack */
-    "  pxor        %%mm4, %%mm4     \n\t"       /* mm4 low sum */
-    "  pxor        %%mm5, %%mm5     \n\t"       /* mm5 high sum */
-    "  pxor        %%mm6, %%mm6     \n\t"       /* mm6 low sum */
-    "  pxor        %%mm7, %%mm7     \n\t"       /* mm7 high sum */
-    "  mov         $4, %%edi        \n\t"       /* 4 rows */
-    "1:                             \n\t"
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%2), %%mm1      \n\t"       /* take 8 bytes */
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm3, %%mm0     \n\t"       /* unpack to higher precision for accumulation */
-    "  paddw       %%mm0, %%mm4     \n\t"       /* accumulate difference... */
-    "  punpckhbw   %%mm3, %%mm1     \n\t"       /* unpack high four bytes to higher precision */
-    "  paddw       %%mm1, %%mm5     \n\t"       /* accumulate difference... */
-    "  add         %3, %1           \n\t"       /* Inc pointer into the new data */
-    "  add         %3, %2           \n\t"       /* Inc pointer into the new data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  mov         $4, %%edi        \n\t"       /* 4 rows */
-    "2:                             \n\t"
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%2), %%mm1      \n\t"       /* take 8 bytes */
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm3, %%mm0     \n\t"       /* unpack to higher precision for accumulation */
-    "  paddw       %%mm0, %%mm6     \n\t"       /* accumulate difference... */
-    "  punpckhbw   %%mm3, %%mm1     \n\t"       /* unpack high four bytes to higher precision */
-    "  paddw       %%mm1, %%mm7     \n\t"       /* accumulate difference... */
-    "  add         %3, %1           \n\t"       /* Inc pointer into the new data */
-    "  add         %3, %2           \n\t"       /* Inc pointer into the new data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 2b                       \n\t"
-
-    "  pmaxsw      %%mm6, %%mm7     \n\t"
-    "  pmaxsw      %%mm4, %%mm5     \n\t"
-    "  pmaxsw      %%mm5, %%mm7     \n\t"
-    "  movq        %%mm7, %%mm6     \n\t"
-    "  psrlq       $32, %%mm6       \n\t"
-    "  pmaxsw      %%mm6, %%mm7     \n\t"
-    "  movq        %%mm7, %%mm6     \n\t"
-    "  psrlq       $16, %%mm6       \n\t"
-    "  pmaxsw      %%mm6, %%mm7     \n\t"
-    "  movd        %%mm7, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=r" (MaxSad),
-       "+r" (Src1),
-       "+r" (Src2)
-     : "r" (stride)
-     : "memory", "edi"
-  );
-
-  return MaxSad;
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                     unsigned char *RefDataPtr1,
-                                     unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
-  ogg_uint32_t XSum;
-  ogg_uint32_t XXSum;
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-
-    "  pxor        %%mm4, %%mm4     \n\t"
-    "  pxor        %%mm5, %%mm5     \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  mov         $8, %%edi        \n\t"
-    "1:                             \n\t"
-    "  movq        (%2), %%mm0      \n\t"       /* take 8 bytes */
-
-    "  movq        (%3), %%mm2      \n\t"
-    "  movq        (%4), %%mm1      \n\t"       /* take average of mm2 and mm1 */
-    "  pavgb       %%mm2, %%mm1     \n\t"
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  movq        %%mm1, %%mm3     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"
-    "  punpcklbw   %%mm4, %%mm1     \n\t"
-    "  punpckhbw   %%mm6, %%mm2     \n\t"
-    "  punpckhbw   %%mm4, %%mm3     \n\t"
-
-    "  psubsw      %%mm1, %%mm0     \n\t"
-    "  psubsw      %%mm3, %%mm2     \n\t"
-
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  paddw       %%mm2, %%mm5     \n\t"
-
-    "  pmaddwd     %%mm0, %%mm0     \n\t"
-    "  pmaddwd     %%mm2, %%mm2     \n\t"
-
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  paddd       %%mm2, %%mm7     \n\t"
-
-    "  add         %5, %2           \n\t"       /* Inc pointer into src data */
-    "  add         %6, %3           \n\t"       /* Inc pointer into ref data */
-    "  add         %6, %4           \n\t"       /* Inc pointer into ref data */
-
-    "  dec         %%edi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $32, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $16, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movd        %%mm5, %%edi     \n\t"
-    "  movsx       %%di, %%edi      \n\t"
-    "  movl        %%edi, %0        \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %1        \n\t"
-
-     : "=m" (XSum),
-       "=m" (XXSum),
-       "+r" (SrcData),
-       "+r" (RefDataPtr1),
-       "+r" (RefDataPtr2)
-     : "m" (SrcStride),
-       "m" (RefStride)
-     : "edi", "memory"
-  );
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-}
-
-void dsp_mmxext_init(DspFunctions *funcs)
-{
-  funcs->row_sad8 = row_sad8__mmxext;
-  funcs->col_sad8x8 = col_sad8x8__mmxext;
-  funcs->sad8x8 = sad8x8__mmxext;
-  funcs->sad8x8_thres = sad8x8_thres__mmxext;
-  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmxext;
-  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmxext;
-}
-
-#endif /* USE_ASM */

+ 0 - 339
Engine/lib/libtheora/lib/enc/x86_32/fdct_mmx.c

@@ -1,339 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: fdct_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-/* mmx fdct implementation */
-
-#include "theora/theora.h"
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC4S4 = 0x0b505b505b505b505LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC6S2 = 0x061f861f861f861f8LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC7S1 = 0x031f131f131f131f1LL;
-
-/* execute stage 1 of forward DCT */
-#define Fdct_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,temp)                        \
-  "  movq      " #ip0 ", %%mm0      \n\t"                                     \
-  "  movq      " #ip1 ", %%mm1      \n\t"                                     \
-  "  movq      " #ip3 ", %%mm2      \n\t"                                     \
-  "  movq      " #ip5 ", %%mm3      \n\t"                                     \
-  "  movq        %%mm0, %%mm4       \n\t"                                     \
-  "  movq        %%mm1, %%mm5       \n\t"                                     \
-  "  movq        %%mm2, %%mm6       \n\t"                                     \
-  "  movq        %%mm3, %%mm7       \n\t"                                     \
-                                                                              \
-  "  paddsw    " #ip7 ", %%mm0      \n\t" /* mm0 = ip0 + ip7 = is07 */        \
-  "  paddsw    " #ip2 ", %%mm1      \n\t" /* mm1 = ip1 + ip2 = is12 */        \
-  "  paddsw    " #ip4 ", %%mm2      \n\t" /* mm2 = ip3 + ip4 = is34 */        \
-  "  paddsw    " #ip6 ", %%mm3      \n\t" /* mm3 = ip5 + ip6 = is56 */        \
-  "  psubsw    " #ip7 ", %%mm4      \n\t" /* mm4 = ip0 - ip7 = id07 */        \
-  "  psubsw    " #ip2 ", %%mm5      \n\t" /* mm5 = ip1 - ip2 = id12 */        \
-                                                                              \
-  "  psubsw      %%mm2, %%mm0       \n\t" /* mm0 = is07 - is34 */             \
-                                                                              \
-  "  paddsw      %%mm2, %%mm2       \n\t"                                     \
-                                                                              \
-  "  psubsw    " #ip4 ", %%mm6      \n\t" /* mm6 = ip3 - ip4 = id34 */        \
-                                                                              \
-  "  paddsw      %%mm0, %%mm2       \n\t" /* mm2 = is07 + is34 = is0734 */    \
-  "  psubsw      %%mm3, %%mm1       \n\t" /* mm1 = is12 - is56 */             \
-  "  movq        %%mm0," #temp "    \n\t" /* Save is07 - is34 to free mm0; */ \
-  "  paddsw      %%mm3, %%mm3       \n\t"                                     \
-  "  paddsw      %%mm1, %%mm3       \n\t" /* mm3 = is12 + 1s56  = is1256 */   \
-                                                                              \
-  "  psubsw    " #ip6 ", %%mm7      \n\t" /* mm7 = ip5 - ip6 = id56 */        \
-  /* ------------------------------------------------------------------- */   \
-  "  psubsw      %%mm7, %%mm5       \n\t" /* mm5 = id12 - id56 */             \
-  "  paddsw      %%mm7, %%mm7       \n\t"                                     \
-  "  paddsw      %%mm5, %%mm7       \n\t" /* mm7 = id12 + id56 */             \
-  /* ------------------------------------------------------------------- */   \
-  "  psubsw      %%mm3, %%mm2       \n\t" /* mm2 = is0734 - is1256 */         \
-  "  paddsw      %%mm3, %%mm3       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm2, %%mm0       \n\t" /* make a copy */                   \
-  "  paddsw      %%mm2, %%mm3       \n\t" /* mm3 = is0734 + is1256 */         \
-                                                                              \
-  "  pmulhw      %[xC4S4], %%mm0    \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \
-  "  paddw       %%mm2, %%mm0       \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  paddw       %%mm2, %%mm0       \n\t" /* Truncate mm0, now it is op[4] */ \
-                                                                              \
-  "  movq        %%mm3, %%mm2       \n\t"                                     \
-  "  movq        %%mm0," #ip4 "     \n\t" /* save ip4, now mm0,mm2 are free */ \
-                                                                              \
-  "  movq        %%mm3, %%mm0       \n\t"                                     \
-  "  pmulhw      %[xC4S4], %%mm3    \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  paddw       %%mm0, %%mm3       \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 )    */ \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncate mm3, now it is op[0] */ \
-                                                                              \
-  "  movq        %%mm3," #ip0 "     \n\t"                                     \
-  /* ------------------------------------------------------------------- */   \
-  "  movq      " #temp ", %%mm3     \n\t" /* mm3 = irot_input_y */            \
-  "  pmulhw      %[xC2S6], %%mm3    \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  movq      " #temp ", %%mm2     \n\t"                                     \
-  "  movq        %%mm2, %%mm0       \n\t"                                     \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t" /* mm3 = xC2S6 * irot_input_y */    \
-  "  paddw       %%mm0, %%mm3       \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncated */                     \
-  "  movq        %%mm5, %%mm0       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm5, %%mm2       \n\t"                                     \
-  "  pmulhw      %[xC6S2], %%mm0    \n\t" /* mm0 = xC6S2 * irot_input_x */    \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  paddw       %%mm2, %%mm0       \n\t" /* Truncated */                     \
-                                                                              \
-  "  paddsw      %%mm0, %%mm3       \n\t" /* ip[2] */                         \
-  "  movq        %%mm3," #ip2 "     \n\t" /* Save ip2 */                      \
-                                                                              \
-  "  movq        %%mm5, %%mm0       \n\t"                                     \
-  "  movq        %%mm5, %%mm2       \n\t"                                     \
-                                                                              \
-  "  pmulhw     %[xC2S6], %%mm5     \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  movq      " #temp ", %%mm3     \n\t"                                     \
-  "  paddw       %%mm0, %%mm5       \n\t" /* mm5 = xC2S6 * irot_input_x */    \
-                                                                              \
-  "  paddw       %%mm2, %%mm5       \n\t" /* Truncated */                     \
-  "  movq        %%mm3, %%mm2       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %[xC6S2], %%mm3    \n\t" /* mm3 = xC6S2 * irot_input_y */    \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncated */                     \
-  "  psubsw      %%mm5, %%mm3       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm3," #ip6 "     \n\t"                                     \
-  /* ------------------------------------------------------------------- */   \
-  "  movq        %[xC4S4], %%mm0    \n\t"                                     \
-  "  movq        %%mm1, %%mm2       \n\t"                                     \
-  "  movq        %%mm1, %%mm3       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm0, %%mm1       \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm1       \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) */ \
-  "  paddw       %%mm2, %%mm1       \n\t" /* Truncate mm1, now it is icommon_product1 */ \
-                                                                              \
-  "  movq        %%mm7, %%mm2       \n\t"                                     \
-  "  movq        %%mm7, %%mm3       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm0, %%mm7       \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm7       \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) */ \
-  "  paddw       %%mm2, %%mm7       \n\t" /* Truncate mm7, now it is icommon_product2 */ \
-  /* ------------------------------------------------------------------- */   \
-  "  pxor        %%mm0, %%mm0       \n\t" /* Clear mm0 */                     \
-  "  psubsw      %%mm6, %%mm0       \n\t" /* mm0 = - id34 */                  \
-                                                                              \
-  "  psubsw      %%mm7, %%mm0       \n\t" /* mm0 = - ( id34 + idcommon_product2 ) */ \
-  "  paddsw      %%mm6, %%mm6       \n\t"                                     \
-  "  paddsw      %%mm0, %%mm6       \n\t" /* mm6 = id34 - icommon_product2 */ \
-                                                                              \
-  "  psubsw      %%mm1, %%mm4       \n\t" /* mm4 = id07 - icommon_product1 */ \
-  "  paddsw      %%mm1, %%mm1       \n\t"                                     \
-  "  paddsw      %%mm4, %%mm1       \n\t" /* mm1 = id07 + icommon_product1 */ \
-  /* ------------------------------------------------------------------- */   \
-  "  movq        %[xC1S7], %%mm7    \n\t"                                     \
-  "  movq        %%mm1, %%mm2       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm1, %%mm3       \n\t"                                     \
-  "  pmulhw      %%mm7, %%mm1       \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \
-                                                                              \
-  "  movq        %[xC7S1], %%mm7    \n\t"                                     \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm1       \n\t" /* mm1 = xC1S7 * irot_input_x */    \
-  "  paddw       %%mm2, %%mm1       \n\t" /* Trucated */                      \
-                                                                              \
-  "  pmulhw      %%mm7, %%mm3       \n\t" /* mm3 = xC7S1 * irot_input_x */    \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncated */                     \
-                                                                              \
-  "  movq        %%mm0, %%mm5       \n\t"                                     \
-  "  movq        %%mm0, %%mm2       \n\t"                                     \
-                                                                              \
-  "  movq        %[xC1S7], %%mm7    \n\t"                                     \
-  "  pmulhw      %%mm7, %%mm0       \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  movq        %[xC7S1], %%mm7    \n\t"                                     \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm5, %%mm0       \n\t" /* mm0 = xC1S7 * irot_input_y */    \
-  "  paddw       %%mm2, %%mm0       \n\t" /* Truncated */                     \
-                                                                              \
-  "  pmulhw      %%mm7, %%mm5       \n\t" /* mm5 = xC7S1 * irot_input_y */    \
-  "  paddw       %%mm2, %%mm5       \n\t" /* Truncated */                     \
-                                                                              \
-  "  psubsw      %%mm5, %%mm1       \n\t" /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ \
-  "  paddsw      %%mm0, %%mm3       \n\t" /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ \
-                                                                              \
-  "  movq        %%mm1," #ip1 "     \n\t"                                     \
-  "  movq        %%mm3," #ip7 "     \n\t"                                     \
-  /* ------------------------------------------------------------------- */   \
-  "  movq        %[xC3S5], %%mm0    \n\t"                                     \
-  "  movq        %[xC5S3], %%mm1    \n\t"                                     \
-                                                                              \
-  "  movq        %%mm6, %%mm5       \n\t"                                     \
-  "  movq        %%mm6, %%mm7       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm4, %%mm2       \n\t"                                     \
-  "  movq        %%mm4, %%mm3       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm0, %%mm4       \n\t" /* mm4 = xC3S5 * irot_input_x - irot_input_x */ \
-  "  pmulhw      %%mm1, %%mm6       \n\t" /* mm6 = xC5S3 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  psrlw       $15, %%mm5         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm4       \n\t" /* mm4 = xC3S5 * irot_input_x */    \
-  "  paddw       %%mm7, %%mm6       \n\t" /* mm6 = xC5S3 * irot_input_y */    \
-                                                                              \
-  "  paddw       %%mm2, %%mm4       \n\t" /* Truncated */                     \
-  "  paddw       %%mm5, %%mm6       \n\t" /* Truncated */                     \
-                                                                              \
-  "  psubsw      %%mm6, %%mm4       \n\t" /* ip3 */                           \
-  "  movq        %%mm4," #ip3 "     \n\t"                                     \
-                                                                              \
-  "  movq        %%mm3, %%mm4       \n\t"                                     \
-  "  movq        %%mm7, %%mm6       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm1, %%mm3       \n\t" /* mm3 = xC5S3 * irot_input_x - irot_input_x */ \
-  "  pmulhw      %%mm0, %%mm7       \n\t" /* mm7 = xC3S5 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  paddw       %%mm2, %%mm4       \n\t"                                     \
-  "  paddw       %%mm5, %%mm6       \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm4, %%mm3       \n\t" /* mm3 = xC5S3 * irot_input_x */    \
-  "  paddw       %%mm6, %%mm7       \n\t" /* mm7 = xC3S5 * irot_input_y */    \
-                                                                              \
-  "  paddw       %%mm7, %%mm3       \n\t" /* ip5 */                           \
-  "  movq        %%mm3," #ip5 "     \n\t"
-
-#define Transpose_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,                  \
-                      op0,op1,op2,op3,op4,op5,op6,op7)                  \
-  "  movq      " #ip0 ", %%mm0      \n\t" /* mm0 = a0 a1 a2 a3 */       \
-  "  movq      " #ip4 ", %%mm4      \n\t" /* mm4 = e4 e5 e6 e7 */       \
-  "  movq      " #ip1 ", %%mm1      \n\t" /* mm1 = b0 b1 b2 b3 */       \
-  "  movq      " #ip5 ", %%mm5      \n\t" /* mm5 = f4 f5 f6 f7 */       \
-  "  movq      " #ip2 ", %%mm2      \n\t" /* mm2 = c0 c1 c2 c3 */       \
-  "  movq      " #ip6 ", %%mm6      \n\t" /* mm6 = g4 g5 g6 g7 */       \
-  "  movq      " #ip3 ", %%mm3      \n\t" /* mm3 = d0 d1 d2 d3 */       \
-  "  movq        %%mm1," #op1 "     \n\t" /* save  b0 b1 b2 b3 */       \
-  "  movq      " #ip7 ", %%mm7      \n\t" /* mm7 = h0 h1 h2 h3 */       \
-   /* Transpose 2x8 block */                                            \
-  "  movq        %%mm4, %%mm1       \n\t" /* mm1 = e3 e2 e1 e0 */       \
-  "  punpcklwd   %%mm5, %%mm4       \n\t" /* mm4 = f1 e1 f0 e0 */       \
-  "  movq        %%mm0," #op0 "     \n\t" /* save a3 a2 a1 a0  */       \
-  "  punpckhwd   %%mm5, %%mm1       \n\t" /* mm1 = f3 e3 f2 e2 */       \
-  "  movq        %%mm6, %%mm0       \n\t" /* mm0 = g3 g2 g1 g0 */       \
-  "  punpcklwd   %%mm7, %%mm6       \n\t" /* mm6 = h1 g1 h0 g0 */       \
-  "  movq        %%mm4, %%mm5       \n\t" /* mm5 = f1 e1 f0 e0 */       \
-  "  punpckldq   %%mm6, %%mm4       \n\t" /* mm4 = h0 g0 f0 e0 = MM4 */ \
-  "  punpckhdq   %%mm6, %%mm5       \n\t" /* mm5 = h1 g1 f1 e1 = MM5 */ \
-  "  movq        %%mm1, %%mm6       \n\t" /* mm6 = f3 e3 f2 e2 */       \
-  "  movq        %%mm4," #op4 "     \n\t"                               \
-  "  punpckhwd   %%mm7, %%mm0       \n\t" /* mm0 = h3 g3 h2 g2 */       \
-  "  movq        %%mm5," #op5 "     \n\t"                               \
-  "  punpckhdq   %%mm0, %%mm6       \n\t" /* mm6 = h3 g3 f3 e3 = MM7 */ \
-  "  movq      " #op0 ", %%mm4      \n\t" /* mm4 = a3 a2 a1 a0 */       \
-  "  punpckldq   %%mm0, %%mm1       \n\t" /* mm1 = h2 g2 f2 e2 = MM6 */ \
-  "  movq      " #op1 ", %%mm5      \n\t" /* mm5 = b3 b2 b1 b0 */       \
-  "  movq        %%mm4, %%mm0       \n\t" /* mm0 = a3 a2 a1 a0 */       \
-  "  movq        %%mm6," #op7 "     \n\t"                               \
-  "  punpcklwd   %%mm5, %%mm0       \n\t" /* mm0 = b1 a1 b0 a0 */       \
-  "  movq        %%mm1," #op6 "     \n\t"                               \
-  "  punpckhwd   %%mm5, %%mm4       \n\t" /* mm4 = b3 a3 b2 a2 */       \
-  "  movq        %%mm2, %%mm5       \n\t" /* mm5 = c3 c2 c1 c0 */       \
-  "  punpcklwd   %%mm3, %%mm2       \n\t" /* mm2 = d1 c1 d0 c0 */       \
-  "  movq        %%mm0, %%mm1       \n\t" /* mm1 = b1 a1 b0 a0 */       \
-  "  punpckldq   %%mm2, %%mm0       \n\t" /* mm0 = d0 c0 b0 a0 = MM0 */ \
-  "  punpckhdq   %%mm2, %%mm1       \n\t" /* mm1 = d1 c1 b1 a1 = MM1 */ \
-  "  movq        %%mm4, %%mm2       \n\t" /* mm2 = b3 a3 b2 a2 */       \
-  "  movq        %%mm0," #op0 "     \n\t"                               \
-  "  punpckhwd   %%mm3, %%mm5       \n\t" /* mm5 = d3 c3 d2 c2 */       \
-  "  movq        %%mm1," #op1 "     \n\t"                               \
-  "  punpckhdq   %%mm5, %%mm4       \n\t" /* mm4 = d3 c3 b3 a3 = MM3 */ \
-  "  punpckldq   %%mm5, %%mm2       \n\t" /* mm2 = d2 c2 b2 a2 = MM2 */ \
-  "  movq        %%mm4," #op3 "     \n\t"                               \
-  "  movq        %%mm2," #op2 "     \n\t"
-
-
-/* This performs a 2D Forward DCT on an 8x8 block with short
-   coefficients. We try to do the truncation to match the C
-   version. */
-static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
-{
-  ogg_int16_t __attribute__((aligned(8))) temp[8*8];
-
-  __asm__ __volatile__ (
-    "  .p2align 4                   \n\t"
-    /*
-     * Input data is an 8x8 block.  To make processing of the data more efficent
-     * we will transpose the block of data to two 4x8 blocks???
-     */
-    Transpose_mmx (  (%0), 16(%0), 32(%0), 48(%0),  8(%0), 24(%0), 40(%0), 56(%0),
-                     (%1), 16(%1), 32(%1), 48(%1),  8(%1), 24(%1), 40(%1), 56(%1))
-    Fdct_mmx      (  (%1), 16(%1), 32(%1), 48(%1),  8(%1), 24(%1), 40(%1), 56(%1), (%2))
-
-    Transpose_mmx (64(%0), 80(%0), 96(%0),112(%0), 72(%0), 88(%0),104(%0),120(%0),
-                   64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1))
-    Fdct_mmx      (64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
-    Transpose_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1),
-                    0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1))
-    Fdct_mmx      ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), (%2))
-
-    Transpose_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1),
-                    8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1))
-    Fdct_mmx      ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
-    "  emms                         \n\t"
-
-    : "+r" (InputData),
-      "+r" (OutputData)
-    : "r" (temp),
-      [xC1S7] "m" (xC1S7),      /* gcc 3.1+ allows named asm parameters */
-      [xC2S6] "m" (xC2S6),
-      [xC3S5] "m" (xC3S5),
-      [xC4S4] "m" (xC4S4),
-      [xC5S3] "m" (xC5S3),
-      [xC6S2] "m" (xC6S2),
-      [xC7S1] "m" (xC7S1)
-    : "memory"
-  );
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_fdct_init(DspFunctions *funcs)
-{
-  funcs->fdct_short = fdct_short__mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 1452
Engine/lib/libtheora/lib/enc/x86_32/idct_mmx.c

@@ -1,1452 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: idct_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-#define ASM asm
-
-/****************************************************************************
-*
-*   Description  :     IDCT with multiple versions based on # of non 0 coeffs
-*
-*****************************************************************************
-*/
-
-// Dequantization + inverse discrete cosine transform.
-
-// Constants used in MMX implementation of dequantization and idct.
-// All the MMX stuff works with 4 16-bit quantities at a time and
-// we create 11 constants of size 4 x 16 bits.
-// The first 4 are used to mask the individual 16-bit words within a group
-// and are used in the address-shuffling part of the dequantization.
-// The last 7 are fixed-point approximations to the cosines of angles
-// occurring in the DCT; each of these contains 4 copies of the same value.
-
-// There is only one (statically initialized) instance of this object
-// wrapped in an allocator object that forces its starting address
-// to be evenly divisible by 32.  Hence the actual object occupies 2.75
-// cache lines on a Pentium processor.
-
-// Offsets in bytes used by the assembler code below
-// must of course agree with the idctConstants constructor.
-
-#define MaskOffset 0        // 4 masks come in order low word to high
-#define CosineOffset 32     // 7 cosines come in order pi/16 * (1 ... 7)
-#define EightOffset 88
-#define IdctAdjustBeforeShift 8
-
-/*
-UINT16 idctcosTbl[ 7] =
-{
-    64277, 60547, 54491, 46341, 36410, 25080, 12785
-};
-
-void fillidctconstants(void)
-{
-    int j = 16;
-    UINT16 * p;
-    do
-    {
-        idctconstants[ --j] = 0;
-    }
-    while( j);
-
-    idctconstants[0] = idctconstants[5] = idctconstants[10] = idctconstants[15] = 65535;
-
-    j = 1;
-    do
-    {
-        p = idctconstants + ( (j+3) << 2);
-        p[0] = p[1] = p[2] = p[3] = idctcosTbl[ j - 1];
-    }
-    while( ++j <= 7);
-
-    idctconstants[44] = idctconstants[45] = idctconstants[46] = idctconstants[47] = IdctAdjustBeforeShift;
-}
-*/
-
-ogg_uint16_t idctconstants[(4+7+1) * 4] = {
-    65535,     0,     0,     0,     0, 65535,     0,     0,
-        0,     0, 65535,     0,     0,     0,     0, 65535,
-    64277, 64277, 64277, 64277, 60547, 60547, 60547, 60547,
-    54491, 54491, 54491, 54491, 46341, 46341, 46341, 46341,
-    36410, 36410, 36410, 36410, 25080, 25080, 25080, 25080,
-    12785, 12785, 12785, 12785,     8,     8,     8,     8,
-};
-
-/* Dequantization + inverse DCT.
-
-   Dequantization multiplies user's 16-bit signed indices (range -512 to +511)
-   by unsigned 16-bit quantization table entries.
-   These table entries are upscaled by 4, max is 30 * 128 * 4 < 2^14.
-   Result is scaled signed DCT coefficients (abs value < 2^15).
-
-   In the data stream, the coefficients are sent in order of increasing
-   total (horizontal + vertical) frequency.  The exact picture is as follows:
-
-    00 01 05 06  16 17 33 34
-    02 04 07 15  20 32 35 52
-    03 10 14 21  31 36 51 53
-    11 13 22 30  37 50 54 65
-
-    12 23 27 40  47 55 64 66
-    24 26 41 46  56 63 67 74
-    25 42 45 57  62 70 73 75
-    43 44 60 61  71 72 76 77
-
-   Here the position in the matrix corresponds to the (horiz,vert)
-   freqency indices and the octal entry in the matrix is the position
-   of the coefficient in the data stream.  Thus the coefficients are sent
-   in sort of a diagonal "snake".
-
-   The dequantization stage "uncurls the snake" and stores the expanded
-   coefficients in more convenient positions.  These are not exactly the
-   natural positions given above but take into account our implementation
-   of the idct, which basically requires two one-dimensional idcts and
-   two transposes.
-
-   We fold the first transpose into the storage of the expanded coefficients.
-   We don't actually do a full transpose because this would require doubling
-   the size of the idct buffer; rather, we just transpose each of the 4x4
-   subblocks.  Using slightly varying addressing schemes in each of the
-   four 4x8 idcts then allows these transforms to be done in place.
-
-   Transposing the 4x4 subblocks in the matrix above gives
-
-    00 02 03 11  16 20 31 37
-    01 04 10 13  17 32 36 50
-    05 07 14 22  33 35 51 54
-    06 15 21 30  34 52 53 65
-
-    12 24 25 43  47 56 62 71
-    23 26 42 44  55 63 70 72
-    27 41 45 60  64 67 73 76
-    40 46 57 61  66 74 75 77
-
-   Finally, we reverse the words in each 4 word group to clarify
-   direction of shifts.
-
-    11 03 02 00  37 31 20 16
-    13 10 04 01  50 36 32 17
-    22 14 07 05  54 51 35 33
-    30 21 15 06  65 53 52 34
-
-    43 25 24 12  71 62 56 47
-    44 42 26 23  72 70 63 55
-    60 45 41 27  76 73 67 64
-    61 57 46 40  77 75 74 66
-
-   This matrix then shows the 16 4x16 destination words in terms of
-   the 16 4x16 input words.
-
-   We implement this algorithm by manipulation of mmx registers,
-   which seems to be the fastest way to proceed.  It is completely
-   hand-written; there does not seem to be enough recurrence to
-   reasonably compartmentalize any of it.  Hence the resulting
-   program is ugly and bloated.  Furthermore, due to the absence of
-   register pressure, it is boring and artless.  I hate it.
-
-   The idct itself is more interesting.  Since the two-dimensional dct
-   basis functions are products of the one-dimesional dct basis functions,
-   we can compute an inverse (or forward) dct via two 1-D transforms,
-   on rows then on columns.  To exploit MMX parallelism, we actually do
-   both operations on columns, interposing a (partial) transpose between
-   the two 1-D transforms, the first transpose being done by the expansion
-   described above.
-
-   The 8-sample one-dimensional DCT is a standard orthogonal expansion using
-   the (unnormalized) basis functions
-
-    b[k]( i) = cos( pi * k * (2i + 1) / 16);
-
-   here k = 0 ... 7 is the frequency and i = 0 ... 7 is the spatial coordinate.
-   To normalize, b[0] should be multiplied by 1/sqrt( 8) and the other b[k]
-   should be multiplied by 1/2.
-
-   The 8x8 two-dimensional DCT is just the product of one-dimensional DCTs
-   in each direction.  The (unnormalized) basis functions are
-
-    B[k,l]( i, j) = b[k]( i) * b[l]( j);
-
-   this time k and l are the horizontal and vertical frequencies,
-   i and j are the horizontal and vertical spatial coordinates;
-   all indices vary from 0 ... 7 (as above)
-   and there are now 4 cases of normalization.
-
-   Our 1-D idct expansion uses constants C1 ... C7 given by
-
-    (*)  Ck = C(-k) = cos( pi * k/16) = S(8-k) = -S(k-8) = sin( pi * (8-k)/16)
-
-   and the following 1-D algorithm transforming I0 ... I7  to  R0 ... R7 :
-
-   A = (C1 * I1) + (C7 * I7)        B = (C7 * I1) - (C1 * I7)
-   C = (C3 * I3) + (C5 * I5)        D = (C3 * I5) - (C5 * I3)
-   A. = C4 * (A - C)                B. = C4 * (B - D)
-   C. = A + C                       D. = B + D
-
-   E = C4 * (I0 + I4)               F = C4 * (I0 - I4)
-   G = (C2 * I2) + (C6 * I6)        H = (C6 * I2) - (C2 * I6)
-   E. = E - G
-   G. = E + G
-
-   A.. = F + A.                 B.. = B. - H
-   F.  = F - A.                 H.  = B. + H
-
-   R0 = G. + C. R1 = A.. + H.   R3 = E. + D.    R5 = F. + B..
-   R7 = G. - C. R2 = A.. - H.   R4 = E. - D.    R6 = F. - B..
-
-   It is due to Vetterli and Lightenberg and may be found in the JPEG
-   reference book by Pennebaker and Mitchell.
-
-   Correctness of the algorithm follows from (*) together with the
-   addition formulas for sine and cosine:
-
-    cos( A + B) = cos( A) * cos( B)  -  sin( A) * sin( B)
-    sin( A + B) = sin( A) * cos( B)  +  cos( A) * sin( B)
-
-   Note that this implementation absorbs the difference in normalization
-   between the 0th and higher frequencies, although the results produced
-   are actually twice as big as they should be.  Since we do this for each
-   dimension, the 2-D idct results are 4x the desired results.  Finally,
-   taking into account that the dequantization multiplies by 4 as well,
-   our actual results are 16x too big.  We fix this by shifting the final
-   results right by 4 bits.
-
-   High precision version approximates C1 ... C7 to 16 bits.
-   Since MMX only provides a signed multiply, C1 ... C5 appear to be
-   negative and multiplies involving them must be adjusted to compensate
-   for this.  C6 and C7 do not require this adjustment since
-   they are < 1/2 and are correctly treated as positive numbers.
-
-   Following macro does four 8-sample one-dimensional idcts in parallel.
-   This is actually not such a difficult program to write once you
-   make a couple of observations (I of course was unable to make these
-   observations until I'd half-written a couple of other versions).
-
-    1. Everything is easy once you are done with the multiplies.
-       This is because, given X and Y in registers, one may easily
-       calculate X+Y and X-Y using just those 2 registers.
-
-    2. You always need at least 2 extra registers to calculate products,
-       so storing 2 temporaries is inevitable.  C. and D. seem to be
-       the best candidates.
-
-    3. The products should be calculated in decreasing order of complexity
-       (which translates into register pressure).  Since C1 ... C5 require
-       adjustment (and C6, C7 do not), we begin by calculating C and D.
-*/
-
-/**************************************************************************************
- *
- *      Routine:        BeginIDCT
- *
- *      Description:    The Macro does IDct on 4 1-D Dcts
- *
- *      Input:          None
- *
- *      Output:         None
- *
- *      Return:         None
- *
- *      Special Note:   None
- *
- *      Error:          None
- *
- ***************************************************************************************
- */
-
-#define MtoSTR(s) #s
-
-#define Dump    "call MMX_dump\n"
-
-#define BeginIDCT "#BeginIDCT\n"    \
-                                    \
-    "   movq    "   I(3)","r2"\n"   \
-                                    \
-    "   movq    "   C(3)","r6"\n"   \
-    "   movq    "   r2","r4"\n"     \
-    "   movq    "   J(5)","r7"\n"   \
-    "   pmulhw  "   r6","r4"\n"     \
-    "   movq    "   C(5)","r1"\n"   \
-    "   pmulhw  "   r7","r6"\n"     \
-    "   movq    "   r1","r5"\n"     \
-    "   pmulhw  "   r2","r1"\n"     \
-    "   movq    "   I(1)","r3"\n"   \
-    "   pmulhw  "   r7","r5"\n"     \
-    "   movq    "   C(1)","r0"\n"   \
-    "   paddw   "   r2","r4"\n"     \
-    "   paddw   "   r7","r6"\n"     \
-    "   paddw   "   r1","r2"\n"     \
-    "   movq    "   J(7)","r1"\n"   \
-    "   paddw   "   r5","r7"\n"     \
-    "   movq    "   r0","r5"\n"     \
-    "   pmulhw  "   r3","r0"\n"     \
-    "   paddsw  "   r7","r4"\n"     \
-    "   pmulhw  "   r1","r5"\n"     \
-    "   movq    "   C(7)","r7"\n"   \
-    "   psubsw  "   r2","r6"\n"     \
-    "   paddw   "   r3","r0"\n"     \
-    "   pmulhw  "   r7","r3"\n"     \
-    "   movq    "   I(2)","r2"\n"   \
-    "   pmulhw  "   r1","r7"\n"     \
-    "   paddw   "   r1","r5"\n"     \
-    "   movq    "   r2","r1"\n"     \
-    "   pmulhw  "   C(2)","r2"\n"   \
-    "   psubsw  "   r5","r3"\n"     \
-    "   movq    "   J(6)","r5"\n"   \
-    "   paddsw  "   r7","r0"\n"     \
-    "   movq    "   r5","r7"\n"     \
-    "   psubsw  "   r4","r0"\n"     \
-    "   pmulhw  "   C(2)","r5"\n"   \
-    "   paddw   "   r1","r2"\n"     \
-    "   pmulhw  "   C(6)","r1"\n"   \
-    "   paddsw  "   r4","r4"\n"     \
-    "   paddsw  "   r0","r4"\n"     \
-    "   psubsw  "   r6","r3"\n"     \
-    "   paddw   "   r7","r5"\n"     \
-    "   paddsw  "   r6","r6"\n"     \
-    "   pmulhw  "   C(6)","r7"\n"   \
-    "   paddsw  "   r3","r6"\n"     \
-    "   movq    "   r4","I(1)"\n"   \
-    "   psubsw  "   r5","r1"\n"     \
-    "   movq    "   C(4)","r4"\n"   \
-    "   movq    "   r3","r5"\n"     \
-    "   pmulhw  "   r4","r3"\n"     \
-    "   paddsw  "   r2","r7"\n"     \
-    "   movq    "   r6","I(2)"\n"   \
-    "   movq    "   r0","r2"\n"     \
-    "   movq    "   I(0)","r6"\n"   \
-    "   pmulhw  "   r4","r0"\n"     \
-    "   paddw   "   r3","r5"\n"     \
-    "\n"                            \
-    "   movq    "   J(4)","r3"\n"   \
-    "   psubsw  "   r1","r5"\n"     \
-    "   paddw   "   r0","r2"\n"     \
-    "   psubsw  "   r3","r6"\n"     \
-    "   movq    "   r6","r0"\n"     \
-    "   pmulhw  "   r4","r6"\n"     \
-    "   paddsw  "   r3","r3"\n"     \
-    "   paddsw  "   r1","r1"\n"     \
-    "   paddsw  "   r0","r3"\n"     \
-    "   paddsw  "   r5","r1"\n"     \
-    "   pmulhw  "   r3","r4"\n"     \
-    "   paddsw  "   r0","r6"\n"     \
-    "   psubsw  "   r2","r6"\n"     \
-    "   paddsw  "   r2","r2"\n"     \
-    "   movq    "   I(1)","r0"\n"   \
-    "   paddsw  "   r6","r2"\n"     \
-    "   paddw   "   r3","r4"\n"     \
-    "   psubsw  "   r1","r2"\n"     \
-    "#end BeginIDCT\n"
-// end BeginIDCT macro (38 cycles).
-
-
-// Two versions of the end of the idct depending on whether we're feeding
-// into a transpose or dividing the final results by 16 and storing them.
-
-/**************************************************************************************
- *
- *      Routine:        RowIDCT
- *
- *      Description:    The Macro does 1-D IDct on 4 Rows
- *
- *      Input:          None
- *
- *      Output:         None
- *
- *      Return:         None
- *
- *      Special Note:   None
- *
- *      Error:          None
- *
- ***************************************************************************************
- */
-
-// RowIDCT gets ready to transpose.
-
-#define RowIDCT ASM("\n"\
-    "#RowIDCT\n"                                        \
-    BeginIDCT                                           \
-    "\n"                                                \
-    "   movq    "I(2)","r3"\n"  /* r3 = D. */           \
-    "   psubsw  "r7","r4"\n"    /* r4 = E. = E - G */   \
-    "   paddsw  "r1","r1"\n"    /* r1 = H. + H. */      \
-    "   paddsw  "r7","r7"\n"    /* r7 = G + G */        \
-    "   paddsw  "r2","r1"\n"    /* r1 = R1 = A.. + H. */\
-    "   paddsw  "r4","r7"\n"    /* r7 = G. = E + G */   \
-    "   psubsw  "r3","r4"\n"    /* r4 = R4 = E. - D. */ \
-    "   paddsw  "r3","r3"\n"                            \
-    "   psubsw  "r5","r6"\n"    /* r6 = R6 = F. - B.. */\
-    "   paddsw  "r5","r5"\n"                            \
-    "   paddsw  "r4","r3"\n"    /* r3 = R3 = E. + D. */ \
-    "   paddsw  "r6","r5"\n"    /* r5 = R5 = F. + B.. */\
-    "   psubsw  "r0","r7"\n"    /* r7 = R7 = G. - C. */ \
-    "   paddsw  "r0","r0"\n"                            \
-    "   movq    "r1","I(1)"\n"  /* save R1 */           \
-    "   paddsw  "r7","r0"\n"    /* r0 = R0 = G. + C. */ \
-    "#end RowIDCT"                                                                              \
-);
-// end RowIDCT macro (8 + 38 = 46 cycles)
-
-
-/**************************************************************************************
- *
- *      Routine:        ColumnIDCT
- *
- *      Description:    The Macro does 1-D IDct on 4 columns
- *
- *      Input:          None
- *
- *      Output:         None
- *
- *      Return:         None
- *
- *      Special Note:   None
- *
- *      Error:          None
- *
- ***************************************************************************************
- */
-// Column IDCT normalizes and stores final results.
-
-#define ColumnIDCT ASM("\n"                                 \
-    "#ColumnIDCT\n"                                         \
-    BeginIDCT                                               \
-    "\n"                                                    \
-    "   paddsw  "Eight","r2"\n"                             \
-    "   paddsw  "r1","r1"\n"        /* r1 = H. + H. */      \
-    "   paddsw  "r2","r1"\n"        /* r1 = R1 = A.. + H. */\
-    "   psraw   ""$4"","r2"\n"      /* r2 = NR2 */          \
-    "   psubsw  "r7","r4"\n"        /* r4 = E. = E - G */   \
-    "   psraw   ""$4"","r1"\n"      /* r1 = NR1 */          \
-    "   movq    "I(2)","r3"\n"  /* r3 = D. */               \
-    "   paddsw  "r7","r7"\n"        /* r7 = G + G */        \
-    "   movq    "r2","I(2)"\n"  /* store NR2 at I2 */       \
-    "   paddsw  "r4","r7"\n"        /* r7 = G. = E + G */   \
-    "   movq    "r1","I(1)"\n"  /* store NR1 at I1 */       \
-    "   psubsw  "r3","r4"\n"        /* r4 = R4 = E. - D. */ \
-    "   paddsw  "Eight","r4"\n"                             \
-    "   paddsw  "r3","r3"\n"        /* r3 = D. + D. */      \
-    "   paddsw  "r4","r3"\n"        /* r3 = R3 = E. + D. */ \
-    "   psraw   ""$4"","r4"\n"      /* r4 = NR4 */          \
-    "   psubsw  "r5","r6"\n"        /* r6 = R6 = F. - B.. */\
-    "   psraw   ""$4"","r3"\n"      /* r3 = NR3 */          \
-    "   paddsw  "Eight","r6"\n"                             \
-    "   paddsw  "r5","r5"\n"        /* r5 = B.. + B.. */    \
-    "   paddsw  "r6","r5"\n"        /* r5 = R5 = F. + B.. */\
-    "   psraw   ""$4"","r6"\n"      /* r6 = NR6 */          \
-    "   movq    "r4","J(4)"\n"  /* store NR4 at J4 */       \
-    "   psraw   ""$4"","r5"\n"      /* r5 = NR5 */          \
-    "   movq    "r3","I(3)"\n"  /* store NR3 at I3 */       \
-    "   psubsw  "r0","r7"\n"        /* r7 = R7 = G. - C. */ \
-    "   paddsw  "Eight","r7"\n"                             \
-    "   paddsw  "r0","r0"\n"        /* r0 = C. + C. */      \
-    "   paddsw  "r7","r0"\n"        /* r0 = R0 = G. + C. */ \
-    "   psraw   ""$4"","r7"\n"      /* r7 = NR7 */          \
-    "   movq    "r6","J(6)"\n"  /* store NR6 at J6 */       \
-    "   psraw   ""$4"","r0"\n"      /* r0 = NR0 */          \
-    "   movq    "r5","J(5)"\n"  /* store NR5 at J5 */       \
-    "   movq    "r7","J(7)"\n"  /* store NR7 at J7 */       \
-    "   movq    "r0","I(0)"\n"  /* store NR0 at I0 */       \
-    "#end ColumnIDCT\n"                                                                         \
-);
-// end ColumnIDCT macro (38 + 19 = 57 cycles)
-
-/**************************************************************************************
- *
- *      Routine:        Transpose
- *
- *      Description:    The Macro does two 4x4 transposes in place.
- *
- *      Input:          None
- *
- *      Output:         None
- *
- *      Return:         None
- *
- *      Special Note:   None
- *
- *      Error:          None
- *
- ***************************************************************************************
- */
-
-/* Following macro does two 4x4 transposes in place.
-
-  At entry (we assume):
-
-    r0 = a3 a2 a1 a0
-    I(1) = b3 b2 b1 b0
-    r2 = c3 c2 c1 c0
-    r3 = d3 d2 d1 d0
-
-    r4 = e3 e2 e1 e0
-    r5 = f3 f2 f1 f0
-    r6 = g3 g2 g1 g0
-    r7 = h3 h2 h1 h0
-
-   At exit, we have:
-
-    I(0) = d0 c0 b0 a0
-    I(1) = d1 c1 b1 a1
-    I(2) = d2 c2 b2 a2
-    I(3) = d3 c3 b3 a3
-
-    J(4) = h0 g0 f0 e0
-    J(5) = h1 g1 f1 e1
-    J(6) = h2 g2 f2 e2
-    J(7) = h3 g3 f3 e3
-
-   I(0) I(1) I(2) I(3)  is the transpose of r0 I(1) r2 r3.
-   J(4) J(5) J(6) J(7)  is the transpose of r4 r5 r6 r7.
-
-   Since r1 is free at entry, we calculate the Js first. */
-
-
-#define Transpose ASM("\n#Transpose\n"      \
-                                            \
-    "   movq        "r4","r1"\n"            \
-    "   punpcklwd   "r5","r4"\n"            \
-    "   movq        "r0","I(0)"\n"          \
-    "   punpckhwd   "r5","r1"\n"            \
-    "   movq        "r6","r0"\n"            \
-    "   punpcklwd   "r7","r6"\n"            \
-    "   movq        "r4","r5"\n"            \
-    "   punpckldq   "r6","r4"\n"            \
-    "   punpckhdq   "r6","r5"\n"            \
-    "   movq        "r1","r6"\n"            \
-    "   movq        "r4","J(4)"\n"          \
-    "   punpckhwd   "r7","r0"\n"            \
-    "   movq        "r5","J(5)"\n"          \
-    "   punpckhdq   "r0","r6"\n"            \
-    "   movq        "I(0)","r4"\n"          \
-    "   punpckldq   "r0","r1"\n"            \
-    "   movq        "I(1)","r5"\n"          \
-    "   movq        "r4","r0"\n"            \
-    "   movq        "r6","J(7)"\n"          \
-    "   punpcklwd   "r5","r0"\n"            \
-    "   movq        "r1","J(6)"\n"          \
-    "   punpckhwd   "r5","r4"\n"            \
-    "   movq        "r2","r5"\n"            \
-    "   punpcklwd   "r3","r2"\n"            \
-    "   movq        "r0","r1"\n"            \
-    "   punpckldq   "r2","r0"\n"            \
-    "   punpckhdq   "r2","r1"\n"            \
-    "   movq        "r4","r2"\n"            \
-    "   movq        "r0","I(0)"\n"          \
-    "   punpckhwd   "r3","r5"\n"            \
-    "   movq        "r1","I(1)"\n"          \
-    "   punpckhdq   "r5","r4"\n"            \
-    "   punpckldq   "r5","r2"\n"            \
-                                            \
-    "   movq        "r4","I(3)"\n"          \
-                                            \
-    "   movq        "r2","I(2)"\n"          \
-    "#end Transpose\n"                                          \
-);
-// end Transpose macro (19 cycles).
-
-/*
-static void MMX_dump()
-{
-    ASM
-    ("\
-        movq    %mm0,(%edi)\n\
-        movq    %mm1,8(%edi)\n\
-        movq    %mm2,16(%edi)\n\
-        movq    %mm3,24(%edi)\n\
-        movq    %mm4,32(%edi)\n\
-        movq    %mm5,40(%edi)\n\
-        movq    %mm6,48(%edi)\n\
-        movq    %mm7,56(%edi)\n\
-        ret"
-    );
-}
-*/
-
-/**************************************************************************************
- *
- *      Routine:        MMX_idct
- *
- *      Description:    Perform IDCT on a 8x8 block
- *
- *      Input:          Pointer to input and output buffer
- *
- *      Output:         None
- *
- *      Return:         None
- *
- *      Special Note:   The input coefficients are in ZigZag order
- *
- *      Error:          None
- *
- ***************************************************************************************
- */
-void IDctSlow__mmx(  Q_LIST_ENTRY * InputData,
-                ogg_int16_t *QuantMatrix,
-                ogg_int16_t * OutputData ) {
-
-#   define MIDM(M,I)    MtoSTR(M+I*8(%ecx))
-#   define M(I)         MIDM( MaskOffset , I )
-#   define MIDC(M,I)    MtoSTR(M+(I-1)*8(%ecx))
-#   define C(I)         MIDC( CosineOffset , I )
-#   define MIDEight(M)  MtoSTR(M(%ecx))
-#   define Eight        MIDEight(EightOffset)
-
-#   define r0   "%mm0"
-#   define r1   "%mm1"
-#   define r2   "%mm2"
-#   define r3   "%mm3"
-#   define r4   "%mm4"
-#   define r5   "%mm5"
-#   define r6   "%mm6"
-#   define r7   "%mm7"
-
-    __asm__ __volatile__ (
-    /* eax = quantized input */
-    /* esi = quantization table */
-    /* edx = destination (= idct buffer) */
-    /* ecx = idctconstants */
-    ""
-    :
-    :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants)
-    );
-
-    ASM(
-    "movq   (%eax), "r0"\n"
-    "pmullw (%esi), "r0"\n"     /* r0 = 03 02 01 00 */
-    "movq   16(%eax), "r1"\n"
-    "pmullw 16(%esi), "r1"\n"   /* r1 = 13 12 11 10 */
-    "movq   "M(0)", "r2"\n"     /* r2 = __ __ __ FF */
-    "movq   "r0", "r3"\n"       /* r3 = 03 02 01 00 */
-    "movq   8(%eax), "r4"\n"
-    "psrlq  $16, "r0"\n"        /* r0 = __ 03 02 01 */
-    "pmullw 8(%esi), "r4"\n"    /* r4 = 07 06 05 04 */
-    "pand   "r2", "r3"\n"       /* r3 = __ __ __ 00 */
-    "movq   "r0", "r5"\n"       /* r5 = __ 03 02 01 */
-    "movq   "r1", "r6"\n"       /* r6 = 13 12 11 10 */
-    "pand   "r2", "r5"\n"       /* r5 = __ __ __ 01 */
-    "psllq  $32, "r6"\n"        /* r6 = 11 10 __ __ */
-    "movq   "M(3)", "r7"\n"     /* r7 = FF __ __ __ */
-    "pxor   "r5", "r0"\n"       /* r0 = __ 03 02 __ */
-    "pand   "r6", "r7"\n"       /* r7 = 11 __ __ __ */
-    "por    "r3", "r0"\n"       /* r0 = __ 03 02 00 */
-    "pxor   "r7", "r6"\n"       /* r6 = __ 10 __ __ */
-    "por    "r7", "r0"\n"       /* r0 = 11 03 02 00 = R0 */
-    "movq   "M(3)", "r7"\n"     /* r7 = FF __ __ __ */
-    "movq   "r4", "r3"\n"       /* r3 = 07 06 05 04 */
-    "movq   "r0", (%edx)\n"     /* write R0 = r0 */
-    "pand   "r2", "r3"\n"       /* r3 = __ __ __ 04 */
-    "movq   32(%eax), "r0"\n"
-    "psllq  $16, "r3"\n"        /* r3 = __ __ 04 __ */
-    "pmullw 32(%esi), "r0"\n"   /* r0 = 23 22 21 20 */
-    "pand   "r1", "r7"\n"       /* r7 = 13 __ __ __ */
-    "por    "r3", "r5"\n"       /* r5 = __ __ 04 01 */
-    "por    "r6", "r7"\n"       /* r7 = 13 10 __ __ */
-    "movq   24(%eax), "r3"\n"
-    "por    "r5", "r7"\n"       /* r7 = 13 10 04 01 = R1 */
-    "pmullw 24(%esi), "r3"\n"   /* r3 = 17 16 15 14 */
-    "psrlq  $16, "r4"\n"        /* r4 = __ 07 06 05 */
-    "movq   "r7", 16(%edx)\n"   /* write R1 = r7 */
-    "movq   "r4", "r5"\n"       /* r5 = __ 07 06 05 */
-    "movq   "r0", "r7"\n"       /* r7 = 23 22 21 20 */
-    "psrlq  $16, "r4"\n"        /* r4 = __ __ 07 06 */
-    "psrlq  $48, "r7"\n"        /* r7 = __ __ __ 23 */
-    "movq   "r2", "r6"\n"       /* r6 = __ __ __ FF */
-    "pand   "r2", "r5"\n"       /* r5 = __ __ __ 05 */
-    "pand   "r4", "r6"\n"       /* r6 = __ __ __ 06 */
-    "movq   "r7", 80(%edx)\n"   /* partial R9 = __ __ __ 23 */
-    "pxor   "r6", "r4"\n"       /* r4 = __ __ 07 __ */
-    "psrlq  $32, "r1"\n"        /* r1 = __ __ 13 12 */
-    "por    "r5", "r4"\n"       /* r4 = __ __ 07 05 */
-    "movq   "M(3)", "r7"\n"     /* r7 = FF __ __ __ */
-    "pand   "r2", "r1"\n"       /* r1 = __ __ __ 12 */
-    "movq   48(%eax), "r5"\n"
-    "psllq  $16, "r0"\n"        /* r0 = 22 21 20 __ */
-    "pmullw 48(%esi), "r5"\n"   /* r5 = 33 32 31 30 */
-    "pand   "r0", "r7"\n"       /* r7 = 22 __ __ __ */
-    "movq   "r1", 64(%edx)\n"   /* partial R8 = __ __ __ 12 */
-    "por    "r4", "r7"\n"       /* r7 = 22 __ 07 05 */
-    "movq   "r3", "r4"\n"       /* r4 = 17 16 15 14 */
-    "pand   "r2", "r3"\n"       /* r3 = __ __ __ 14 */
-    "movq   "M(2)", "r1"\n"     /* r1 = __ FF __ __ */
-    "psllq  $32, "r3"\n"        /* r3 = __ 14 __ __ */
-    "por    "r3", "r7"\n"       /* r7 = 22 14 07 05 = R2 */
-    "movq   "r5", "r3"\n"       /* r3 = 33 32 31 30 */
-    "psllq  $48, "r3"\n"        /* r3 = 30 __ __ __ */
-    "pand   "r0", "r1"\n"       /* r1 = __ 21 __ __ */
-    "movq   "r7", 32(%edx)\n"   /* write R2 = r7 */
-    "por    "r3", "r6"\n"       /* r6 = 30 __ __ 06 */
-    "movq   "M(1)", "r7"\n"     /* r7 = __ __ FF __ */
-    "por    "r1", "r6"\n"       /* r6 = 30 21 __ 06 */
-    "movq   56(%eax), "r1"\n"
-    "pand   "r4", "r7"\n"       /* r7 = __ __ 15 __ */
-    "pmullw 56(%esi), "r1"\n"   /* r1 = 37 36 35 34 */
-    "por    "r6", "r7"\n"       /* r7 = 30 21 15 06 = R3 */
-    "pand   "M(1)", "r0"\n"     /* r0 = __ __ 20 __ */
-    "psrlq  $32, "r4"\n"        /* r4 = __ __ 17 16 */
-    "movq   "r7", 48(%edx)\n"   /* write R3 = r7 */
-    "movq   "r4", "r6"\n"       /* r6 = __ __ 17 16 */
-    "movq   "M(3)", "r7"\n"     /* r7 = FF __ __ __ */
-    "pand   "r2", "r4"\n"       /* r4 = __ __ __ 16 */
-    "movq   "M(1)", "r3"\n"     /* r3 = __ __ FF __ */
-    "pand   "r1", "r7"\n"       /* r7 = 37 __ __ __ */
-    "pand   "r5", "r3"\n"       /* r3 = __ __ 31 __ */
-    "por    "r4", "r0"\n"       /* r0 = __ __ 20 16 */
-    "psllq  $16, "r3"\n"        /* r3 = __ 31 __ __ */
-    "por    "r0", "r7"\n"       /* r7 = 37 __ 20 16 */
-    "movq   "M(2)", "r4"\n"     /* r4 = __ FF __ __ */
-    "por    "r3", "r7"\n"       /* r7 = 37 31 20 16 = R4 */
-    "movq   80(%eax), "r0"\n"
-    "movq   "r4", "r3"\n"       /* r3 = __ __ FF __ */
-    "pmullw 80(%esi), "r0"\n"   /* r0 = 53 52 51 50 */
-    "pand   "r5", "r4"\n"       /* r4 = __ 32 __ __ */
-    "movq   "r7", 8(%edx)\n"    /* write R4 = r7 */
-    "por    "r4", "r6"\n"       /* r6 = __ 32 17 16 */
-    "movq   "r3", "r4"\n"       /* r4 = __ FF __ __ */
-    "psrlq  $16, "r6"\n"        /* r6 = __ __ 32 17 */
-    "movq   "r0", "r7"\n"       /* r7 = 53 52 51 50 */
-    "pand   "r1", "r4"\n"       /* r4 = __ 36 __ __ */
-    "psllq  $48, "r7"\n"        /* r7 = 50 __ __ __ */
-    "por    "r4", "r6"\n"       /* r6 = __ 36 32 17 */
-    "movq   88(%eax), "r4"\n"
-    "por    "r6", "r7"\n"       /* r7 = 50 36 32 17 = R5 */
-    "pmullw 88(%esi), "r4"\n"   /* r4 = 57 56 55 54 */
-    "psrlq  $16, "r3"\n"        /* r3 = __ __ FF __ */
-    "movq   "r7", 24(%edx)\n"   /* write R5 = r7 */
-    "pand   "r1", "r3"\n"       /* r3 = __ __ 35 __ */
-    "psrlq  $48, "r5"\n"        /* r5 = __ __ __ 33 */
-    "pand   "r2", "r1"\n"       /* r1 = __ __ __ 34 */
-    "movq   104(%eax), "r6"\n"
-    "por    "r3", "r5"\n"       /* r5 = __ __ 35 33 */
-    "pmullw 104(%esi), "r6"\n"  /* r6 = 67 66 65 64 */
-    "psrlq  $16, "r0"\n"        /* r0 = __ 53 52 51 */
-    "movq   "r4", "r7"\n"       /* r7 = 57 56 55 54 */
-    "movq   "r2", "r3"\n"       /* r3 = __ __ __ FF */
-    "psllq  $48, "r7"\n"        /* r7 = 54 __ __ __ */
-    "pand   "r0", "r3"\n"       /* r3 = __ __ __ 51 */
-    "pxor   "r3", "r0"\n"       /* r0 = __ 53 52 __ */
-    "psllq  $32, "r3"\n"        /* r3 = __ 51 __ __ */
-    "por    "r5", "r7"\n"       /* r7 = 54 __ 35 33 */
-    "movq   "r6", "r5"\n"       /* r5 = 67 66 65 64 */
-    "pand   "M(1)", "r6"\n"     /* r6 = __ __ 65 __ */
-    "por    "r3", "r7"\n"       /* r7 = 54 51 35 33 = R6 */
-    "psllq  $32, "r6"\n"        /* r6 = 65 __ __ __ */
-    "por    "r1", "r0"\n"       /* r0 = __ 53 52 34 */
-    "movq   "r7", 40(%edx)\n"   /* write R6 = r7 */
-    "por    "r6", "r0"\n"       /* r0 = 65 53 52 34 = R7 */
-    "movq   120(%eax), "r7"\n"
-    "movq   "r5", "r6"\n"       /* r6 = 67 66 65 64 */
-    "pmullw 120(%esi), "r7"\n"  /* r7 = 77 76 75 74 */
-    "psrlq  $32, "r5"\n"        /* r5 = __ __ 67 66 */
-    "pand   "r2", "r6"\n"       /* r6 = __ __ __ 64 */
-    "movq   "r5", "r1"\n"       /* r1 = __ __ 67 66 */
-    "movq   "r0", 56(%edx)\n"   /* write R7 = r0 */
-    "pand   "r2", "r1"\n"       /* r1 = __ __ __ 66 */
-    "movq   112(%eax), "r0"\n"
-    "movq   "r7", "r3"\n"       /* r3 = 77 76 75 74 */
-    "pmullw 112(%esi), "r0"\n"  /* r0 = 73 72 71 70 */
-    "psllq  $16, "r3"\n"        /* r3 = 76 75 74 __ */
-    "pand   "M(3)", "r7"\n"     /* r7 = 77 __ __ __ */
-    "pxor   "r1", "r5"\n"       /* r5 = __ __ 67 __ */
-    "por    "r5", "r6"\n"       /* r6 = __ __ 67 64 */
-    "movq   "r3", "r5"\n"       /* r5 = 76 75 74 __ */
-    "pand   "M(3)", "r5"\n"     /* r5 = 76 __ __ __ */
-    "por    "r1", "r7"\n"       /* r7 = 77 __ __ 66 */
-    "movq   96(%eax), "r1"\n"
-    "pxor   "r5", "r3"\n"       /* r3 = __ 75 74 __ */
-    "pmullw 96(%esi), "r1"\n"   /* r1 = 63 62 61 60 */
-    "por    "r3", "r7"\n"       /* r7 = 77 75 74 66 = R15 */
-    "por    "r5", "r6"\n"       /* r6 = 76 __ 67 64 */
-    "movq   "r0", "r5"\n"       /* r5 = 73 72 71 70 */
-    "movq   "r7", 120(%edx)\n"  /* store R15 = r7 */
-    "psrlq  $16, "r5"\n"        /* r5 = __ 73 72 71 */
-    "pand   "M(2)", "r5"\n"     /* r5 = __ 73 __ __ */
-    "movq   "r0", "r7"\n"       /* r7 = 73 72 71 70 */
-    "por    "r5", "r6"\n"       /* r6 = 76 73 67 64 = R14 */
-    "pand   "r2", "r0"\n"       /* r0 = __ __ __ 70 */
-    "pxor   "r0", "r7"\n"       /* r7 = 73 72 71 __ */
-    "psllq  $32, "r0"\n"        /* r0 = __ 70 __ __ */
-    "movq   "r6", 104(%edx)\n"  /* write R14 = r6 */
-    "psrlq  $16, "r4"\n"        /* r4 = __ 57 56 55 */
-    "movq   72(%eax), "r5"\n"
-    "psllq  $16, "r7"\n"        /* r7 = 72 71 __ __ */
-    "pmullw 72(%esi), "r5"\n"   /* r5 = 47 46 45 44 */
-    "movq   "r7", "r6"\n"       /* r6 = 72 71 __ __ */
-    "movq   "M(2)", "r3"\n"     /* r3 = __ FF __ __ */
-    "psllq  $16, "r6"\n"        /* r6 = 71 __ __ __ */
-    "pand   "M(3)", "r7"\n"     /* r7 = 72 __ __ __ */
-    "pand   "r1", "r3"\n"       /* r3 = __ 62 __ __ */
-    "por    "r0", "r7"\n"       /* r7 = 72 70 __ __ */
-    "movq   "r1", "r0"\n"       /* r0 = 63 62 61 60 */
-    "pand   "M(3)", "r1"\n"     /* r1 = 63 __ __ __ */
-    "por    "r3", "r6"\n"       /* r6 = 71 62 __ __ */
-    "movq   "r4", "r3"\n"       /* r3 = __ 57 56 55 */
-    "psrlq  $32, "r1"\n"        /* r1 = __ __ 63 __ */
-    "pand   "r2", "r3"\n"       /* r3 = __ __ __ 55 */
-    "por    "r1", "r7"\n"       /* r7 = 72 70 63 __ */
-    "por    "r3", "r7"\n"       /* r7 = 72 70 63 55 = R13 */
-    "movq   "r4", "r3"\n"       /* r3 = __ 57 56 55 */
-    "pand   "M(1)", "r3"\n"     /* r3 = __ __ 56 __ */
-    "movq   "r5", "r1"\n"       /* r1 = 47 46 45 44 */
-    "movq   "r7", 88(%edx)\n"   /* write R13 = r7 */
-    "psrlq  $48, "r5"\n"        /* r5 = __ __ __ 47 */
-    "movq   64(%eax), "r7"\n"
-    "por    "r3", "r6"\n"       /* r6 = 71 62 56 __ */
-    "pmullw 64(%esi), "r7"\n"   /* r7 = 43 42 41 40 */
-    "por    "r5", "r6"\n"       /* r6 = 71 62 56 47 = R12 */
-    "pand   "M(2)", "r4"\n"     /* r4 = __ 57 __ __ */
-    "psllq  $32, "r0"\n"        /* r0 = 61 60 __ __ */
-    "movq   "r6", 72(%edx)\n"   /* write R12 = r6 */
-    "movq   "r0", "r6"\n"       /* r6 = 61 60 __ __ */
-    "pand   "M(3)", "r0"\n"     /* r0 = 61 __ __ __ */
-    "psllq  $16, "r6"\n"        /* r6 = 60 __ __ __ */
-    "movq   40(%eax), "r5"\n"
-    "movq   "r1", "r3"\n"       /* r3 = 47 46 45 44 */
-    "pmullw 40(%esi), "r5"\n"   /* r5 = 27 26 25 24 */
-    "psrlq  $16, "r1"\n"        /* r1 = __ 47 46 45 */
-    "pand   "M(1)", "r1"\n"     /* r1 = __ __ 46 __ */
-    "por    "r4", "r0"\n"       /* r0 = 61 57 __ __ */
-    "pand   "r7", "r2"\n"       /* r2 = __ __ __ 40 */
-    "por    "r1", "r0"\n"       /* r0 = 61 57 46 __ */
-    "por    "r2", "r0"\n"       /* r0 = 61 57 46 40 = R11 */
-    "psllq  $16, "r3"\n"        /* r3 = 46 45 44 __ */
-    "movq   "r3", "r4"\n"       /* r4 = 46 45 44 __ */
-    "movq   "r5", "r2"\n"       /* r2 = 27 26 25 24 */
-    "movq   "r0", 112(%edx)\n"  /* write R11 = r0 */
-    "psrlq  $48, "r2"\n"        /* r2 = __ __ __ 27 */
-    "pand   "M(2)", "r4"\n"     /* r4 = __ 45 __ __ */
-    "por    "r2", "r6"\n"       /* r6 = 60 __ __ 27 */
-    "movq   "M(1)", "r2"\n"     /* r2 = __ __ FF __ */
-    "por    "r4", "r6"\n"       /* r6 = 60 45 __ 27 */
-    "pand   "r7", "r2"\n"       /* r2 = __ __ 41 __ */
-    "psllq  $32, "r3"\n"        /* r3 = 44 __ __ __ */
-    "por    80(%edx), "r3"\n"   /* r3 = 44 __ __ 23 */
-    "por    "r2", "r6"\n"       /* r6 = 60 45 41 27 = R10 */
-    "movq   "M(3)", "r2"\n"     /* r2 = FF __ __ __ */
-    "psllq  $16, "r5"\n"        /* r5 = 26 25 24 __ */
-    "movq   "r6", 96(%edx)\n"   /* store R10 = r6 */
-    "pand   "r5", "r2"\n"       /* r2 = 26 __ __ __ */
-    "movq   "M(2)", "r6"\n"     /* r6 = __ FF __ __ */
-    "pxor   "r2", "r5"\n"       /* r5 = __ 25 24 __ */
-    "pand   "r7", "r6"\n"       /* r6 = __ 42 __ __ */
-    "psrlq  $32, "r2"\n"        /* r2 = __ __ 26 __ */
-    "pand   "M(3)", "r7"\n"     /* r7 = 43 __ __ __ */
-    "por    "r2", "r3"\n"       /* r3 = 44 __ 26 23 */
-    "por    64(%edx), "r7"\n"   /* r7 = 43 __ __ 12 */
-    "por    "r3", "r6"\n"       /* r6 = 44 42 26 23 = R9 */
-    "por    "r5", "r7"\n"       /* r7 = 43 25 24 12 = R8 */
-    "movq   "r6", 80(%edx)\n"   /* store R9 = r6 */
-    "movq   "r7", 64(%edx)\n"   /* store R8 = r7 */
-    );
-    /* 123c  ( / 64 coeffs  < 2c / coeff) */
-#   undef M
-
-/* Done w/dequant + descramble + partial transpose; now do the idct itself. */
-
-#   define I( K)    MtoSTR(K*16(%edx))
-#   define J( K)    MtoSTR(((K - 4)*16)+8(%edx))
-
-    RowIDCT         /* 46 c */
-    Transpose       /* 19 c */
-
-#   undef I
-#   undef J
-#   define I( K)    MtoSTR((K*16)+64(%edx))
-#   define J( K)    MtoSTR(((K-4)*16)+72(%edx))
-
-    RowIDCT         /* 46 c */
-    Transpose       /* 19 c */
-
-#   undef I
-#   undef J
-#   define I( K)    MtoSTR((K * 16)(%edx))
-#   define J( K)    I( K)
-
-    ColumnIDCT      /* 57 c */
-
-#   undef I
-#   undef J
-#   define I( K)    MtoSTR((K*16)+8(%edx))
-#   define J( K)    I( K)
-
-    ColumnIDCT      /* 57 c */
-
-#   undef I
-#   undef J
-    /* 368 cycles  ( / 64 coeff  <  6 c / coeff) */
-
-    ASM("emms\n");
-}
-
-/**************************************************************************************
- *
- *      Routine:        MMX_idct10
- *
- *      Description:    Perform IDCT on a 8x8 block with at most 10 nonzero coefficients
- *
- *      Input:          Pointer to input and output buffer
- *
- *      Output:         None
- *
- *      Return:         None
- *
- *      Special Note:   The input coefficients are in transposed ZigZag order
- *
- *      Error:          None
- *
- ***************************************************************************************
- */
-/* --------------------------------------------------------------- */
-// This macro does four 4-sample one-dimensional idcts in parallel.  Inputs
-// 4 thru 7 are assumed to be zero.
-#define BeginIDCT_10 "#BeginIDCT_10\n"  \
-    "   movq    "I(3)","r2"\n"          \
-                                        \
-    "   movq    "C(3)","r6"\n"          \
-    "   movq    "r2","r4"\n"            \
-                                        \
-    "   movq    "C(5)","r1"\n"          \
-    "   pmulhw  "r6","r4"\n"            \
-                                        \
-    "   movq    "I(1)","r3"\n"          \
-    "   pmulhw  "r2","r1"\n"            \
-                                        \
-    "   movq    "C(1)","r0"\n"          \
-    "   paddw   "r2","r4"\n"            \
-                                        \
-    "   pxor    "r6","r6"\n"            \
-    "   paddw   "r1","r2"\n"            \
-                                        \
-    "   movq    "I(2)","r5"\n"          \
-    "   pmulhw  "r3","r0"\n"            \
-                                        \
-    "   movq    "r5","r1"\n"            \
-    "   paddw   "r3","r0"\n"            \
-                                        \
-    "   pmulhw  "C(7)","r3"\n"          \
-    "   psubsw  "r2","r6"\n"            \
-                                        \
-    "   pmulhw  "C(2)","r5"\n"          \
-    "   psubsw  "r4","r0"\n"            \
-                                        \
-    "   movq    "I(2)","r7"\n"          \
-    "   paddsw  "r4","r4"\n"            \
-                                        \
-    "   paddw   "r5","r7"\n"            \
-    "   paddsw  "r0","r4"\n"            \
-                                        \
-    "   pmulhw  "C(6)","r1"\n"          \
-    "   psubsw  "r6","r3"\n"            \
-                                        \
-    "   movq    "r4","I(1)"\n"          \
-    "   paddsw  "r6","r6"\n"            \
-                                        \
-    "   movq    "C(4)","r4"\n"          \
-    "   paddsw  "r3","r6"\n"            \
-                                        \
-    "   movq    "r3","r5"\n"            \
-    "   pmulhw  "r4","r3"\n"            \
-                                        \
-    "   movq    "r6","I(2)"\n"          \
-    "   movq    "r0","r2"\n"            \
-                                        \
-    "   movq    "I(0)","r6"\n"          \
-    "   pmulhw  "r4","r0"\n"            \
-                                        \
-    "   paddw   "r3","r5"\n"            \
-    "   paddw   "r0","r2"\n"            \
-                                        \
-    "   psubsw  "r1","r5"\n"            \
-    "   pmulhw  "r4","r6"\n"            \
-                                        \
-    "   paddw   "I(0)","r6"\n"          \
-    "   paddsw  "r1","r1"\n"            \
-                                        \
-    "   movq    "r6","r4"\n"            \
-    "   paddsw  "r5","r1"\n"            \
-                                        \
-    "   psubsw  "r2","r6"\n"            \
-    "   paddsw  "r2","r2"\n"            \
-                                        \
-    "   movq    "I(1)","r0"\n"          \
-    "   paddsw  "r6","r2"\n"            \
-                                        \
-    "   psubsw  "r1","r2"\n"            \
-    "#end BeginIDCT_10\n"
-// end BeginIDCT_10 macro (25 cycles).
-
-#define RowIDCT_10 ASM("\n"                                 \
-    "#RowIDCT_10\n"                                         \
-    BeginIDCT_10                                            \
-    "\n"                                                    \
-    "   movq    "I(2)","r3"\n"  /* r3 = D. */               \
-    "   psubsw  "r7","r4"\n"        /* r4 = E. = E - G */   \
-    "   paddsw  "r1","r1"\n"        /* r1 = H. + H. */      \
-    "   paddsw  "r7","r7"\n"        /* r7 = G + G */        \
-    "   paddsw  "r2","r1"\n"        /* r1 = R1 = A.. + H. */\
-    "   paddsw  "r4","r7"\n"        /* r7 = G. = E + G */   \
-    "   psubsw  "r3","r4"\n"        /* r4 = R4 = E. - D. */ \
-    "   paddsw  "r3","r3"\n"                                \
-    "   psubsw  "r5","r6"\n"        /* r6 = R6 = F. - B.. */\
-    "   paddsw  "r5","r5"\n"                                \
-    "   paddsw  "r4","r3"\n"        /* r3 = R3 = E. + D. */ \
-    "   paddsw  "r6","r5"\n"        /* r5 = R5 = F. + B.. */\
-    "   psubsw  "r0","r7"\n"        /* r7 = R7 = G. - C. */ \
-    "   paddsw  "r0","r0"\n"                                \
-    "   movq    "r1","I(1)"\n"  /* save R1 */               \
-    "   paddsw  "r7","r0"\n"        /* r0 = R0 = G. + C. */ \
-    "#end RowIDCT_10\n"                                                                         \
-);
-// end RowIDCT macro (8 + 38 = 46 cycles)
-
-// Column IDCT normalizes and stores final results.
-
-#define ColumnIDCT_10 ASM("\n"                          \
-    "#ColumnIDCT_10\n"                                  \
-    BeginIDCT_10                                        \
-    "\n"                                                \
-    "   paddsw  "Eight","r2"\n"                         \
-    "   paddsw  "r1","r1"\n"    /* r1 = H. + H. */      \
-    "   paddsw  "r2","r1"\n"    /* r1 = R1 = A.. + H. */\
-    "   psraw   ""$4"","r2"\n"      /* r2 = NR2 */      \
-    "   psubsw  "r7","r4"\n"    /* r4 = E. = E - G */   \
-    "   psraw   ""$4"","r1"\n"      /* r1 = NR1 */      \
-    "   movq    "I(2)","r3"\n"  /* r3 = D. */           \
-    "   paddsw  "r7","r7"\n"    /* r7 = G + G */        \
-    "   movq    "r2","I(2)"\n"  /* store NR2 at I2 */   \
-    "   paddsw  "r4","r7"\n"    /* r7 = G. = E + G */   \
-    "   movq    "r1","I(1)"\n"  /* store NR1 at I1 */   \
-    "   psubsw  "r3","r4"\n"    /* r4 = R4 = E. - D. */ \
-    "   paddsw  "Eight","r4"\n"                         \
-    "   paddsw  "r3","r3"\n"    /* r3 = D. + D. */      \
-    "   paddsw  "r4","r3"\n"    /* r3 = R3 = E. + D. */ \
-    "   psraw   ""$4"","r4"\n"      /* r4 = NR4 */      \
-    "   psubsw  "r5","r6"\n"    /* r6 = R6 = F. - B.. */\
-    "   psraw   ""$4"","r3"\n"      /* r3 = NR3 */      \
-    "   paddsw  "Eight","r6"\n"                         \
-    "   paddsw  "r5","r5"\n"    /* r5 = B.. + B.. */    \
-    "   paddsw  "r6","r5"\n"    /* r5 = R5 = F. + B.. */\
-    "   psraw   ""$4"","r6"\n"      /* r6 = NR6 */      \
-    "   movq    "r4","J(4)"\n"  /* store NR4 at J4 */   \
-    "   psraw   ""$4"","r5"\n"      /* r5 = NR5 */      \
-    "   movq    "r3","I(3)"\n"  /* store NR3 at I3 */   \
-    "   psubsw  "r0","r7"\n"    /* r7 = R7 = G. - C. */ \
-    "   paddsw  "Eight","r7"\n"                         \
-    "   paddsw  "r0","r0"\n"    /* r0 = C. + C. */      \
-    "   paddsw  "r7","r0"\n"    /* r0 = R0 = G. + C. */ \
-    "   psraw   ""$4"","r7"\n"      /* r7 = NR7 */      \
-    "   movq    "r6","J(6)"\n"  /* store NR6 at J6 */   \
-    "   psraw   ""$4"","r0"\n"      /* r0 = NR0 */      \
-    "   movq    "r5","J(5)"\n"  /* store NR5 at J5 */   \
-                                                        \
-    "   movq    "r7","J(7)"\n"  /* store NR7 at J7 */   \
-                                                        \
-    "   movq    "r0","I(0)"\n"  /* store NR0 at I0 */   \
-    "#end ColumnIDCT_10\n"                                                              \
-);
-// end ColumnIDCT macro (38 + 19 = 57 cycles)
-/* --------------------------------------------------------------- */
-
-
-/* --------------------------------------------------------------- */
-/* IDCT 10 */
-void IDct10__mmx( Q_LIST_ENTRY * InputData,
-             ogg_int16_t *QuantMatrix,
-             ogg_int16_t * OutputData ) {
-
-#   define MIDM(M,I)    MtoSTR(M+I*8(%ecx))
-#   define M(I)         MIDM( MaskOffset , I )
-#   define MIDC(M,I)    MtoSTR(M+(I-1)*8(%ecx))
-#   define C(I)         MIDC( CosineOffset , I )
-#   define MIDEight(M)  MtoSTR(M(%ecx))
-#   define Eight        MIDEight(EightOffset)
-
-#   define r0   "%mm0"
-#   define r1   "%mm1"
-#   define r2   "%mm2"
-#   define r3   "%mm3"
-#   define r4   "%mm4"
-#   define r5   "%mm5"
-#   define r6   "%mm6"
-#   define r7   "%mm7"
-
-    __asm__ __volatile__ (
-    /* eax = quantized input */
-    /* esi = quantization table */
-    /* edx = destination (= idct buffer) */
-    /* ecx = idctconstants */
-    ""
-    :
-    :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants)
-    );
-
-    ASM(
-    "movq   (%eax), "r0"\n"
-    "pmullw (%esi), "r0"\n"     /* r0 = 03 02 01 00 */
-    "movq   16(%eax), "r1"\n"
-    "pmullw 16(%esi), "r1"\n"   /* r1 = 13 12 11 10 */
-    "movq   "M(0)", "r2"\n"     /* r2 = __ __ __ FF */
-    "movq   "r0", "r3"\n"       /* r3 = 03 02 01 00 */
-    "movq   8(%eax), "r4"\n"
-    "psrlq  $16, "r0"\n"        /* r0 = __ 03 02 01 */
-    "pmullw 8(%esi), "r4"\n"    /* r4 = 07 06 05 04 */
-    "pand   "r2", "r3"\n"       /* r3 = __ __ __ 00 */
-    "movq   "r0", "r5"\n"       /* r5 = __ 03 02 01 */
-    "pand   "r2", "r5"\n"       /* r5 = __ __ __ 01 */
-    "psllq  $32, "r1"\n"        /* r1 = 11 10 __ __ */
-    "movq   "M(3)", "r7"\n"     /* r7 = FF __ __ __ */
-    "pxor   "r5", "r0"\n"       /* r0 = __ 03 02 __ */
-    "pand   "r1", "r7"\n"       /* r7 = 11 __ __ __ */
-    "por    "r3", "r0"\n"       /* r0 = __ 03 02 00 */
-    "pxor   "r7", "r1"\n"       /* r1 = __ 10 __ __ */
-    "por    "r7", "r0"\n"       /* r0 = 11 03 02 00 = R0 */
-    "movq   "r4", "r3"\n"       /* r3 = 07 06 05 04 */
-    "movq   "r0", (%edx)\n"     /* write R0 = r0 */
-    "pand   "r2", "r3"\n"       /* r3 = __ __ __ 04 */
-    "psllq  $16, "r3"\n"        /* r3 = __ __ 04 __ */
-    "por    "r3", "r5"\n"       /* r5 = __ __ 04 01 */
-    "por    "r5", "r1"\n"       /* r1 = __ 10 04 01 = R1 */
-    "psrlq  $16, "r4"\n"        /* r4 = __ 07 06 05 */
-    "movq   "r1", 16(%edx)\n"   /* write R1 = r1 */
-    "movq   "r4", "r5"\n"       /* r5 = __ 07 06 05 */
-    "psrlq  $16, "r4"\n"        /* r4 = __ __ 07 06 */
-    "movq   "r2", "r6"\n"       /* r6 = __ __ __ FF */
-    "pand   "r2", "r5"\n"       /* r5 = __ __ __ 05 */
-    "pand   "r4", "r6"\n"       /* r6 = __ __ __ 06 */
-    "pxor   "r6", "r4"\n"       /* r4 = __ __ 07 __ */
-    "por    "r5", "r4"\n"       /* r4 = __ __ 07 05 */
-    "movq   "r4", 32(%edx)\n"   /* write R2 = r4 */
-    "movq   "r6", 48(%edx)\n"   /* write R3 = r6 */
-    );
-#   undef M
-
-/* Done w/dequant + descramble + partial transpose; now do the idct itself. */
-
-#   define I( K)    MtoSTR((K*16)(%edx))
-#   define J( K)    MtoSTR(((K - 4) * 16)+8(%edx))
-
-    RowIDCT_10      /* 33 c */
-    Transpose       /* 19 c */
-
-#   undef I
-#   undef J
-//# define I( K)    [edx + (  K      * 16) + 64]
-//# define J( K)    [edx + ( (K - 4) * 16) + 72]
-
-//  RowIDCT         ; 46 c
-//  Transpose       ; 19 c
-
-//# undef I
-//# undef J
-#   define I( K)    MtoSTR((K * 16)(%edx))
-#   define J( K)    I( K)
-
-    ColumnIDCT_10       /* 44 c */
-
-#   undef I
-#   undef J
-#   define I( K)    MtoSTR((K * 16)+8(%edx))
-#   define J( K)    I( K)
-
-    ColumnIDCT_10       /* 44 c */
-
-#   undef I
-#   undef J
-
-    ASM("emms\n");
-}
-
-/**************************************************************************************
- *
- *      Routine:        MMX_idct3
- *
- *      Description:    Perform IDCT on a 8x8 block with at most 3 nonzero coefficients
- *
- *      Input:          Pointer to input and output buffer
- *
- *      Output:         None
- *
- *      Return:         None
- *
- *      Special Note:   Only works for three nonzero coefficients.
- *
- *      Error:          None
- *
- ***************************************************************************************
- */
-/***************************************************************************************
-    In IDCT 3, we are dealing with only three Non-Zero coefficients in the 8x8 block.
-    In the case that we work in the fashion RowIDCT -> ColumnIDCT, we only have to
-    do 1-D row idcts on the first two rows, the rest six rows remain zero anyway.
-    After row IDCTs, since every column could have nonzero coefficients, we need do
-    eight 1-D column IDCT. However, for each column, there are at most two nonzero
-    coefficients, coefficient 0 and coefficient 1. Same for the coefficents for the
-    two 1-d row idcts. For this reason, the process of a 1-D IDCT is simplified
-
-    from a full version:
-
-    A = (C1 * I1) + (C7 * I7)       B = (C7 * I1) - (C1 * I7)
-    C = (C3 * I3) + (C5 * I5)       D = (C3 * I5) - (C5 * I3)
-    A. = C4 * (A - C)               B. = C4 * (B - D)
-    C. = A + C                      D. = B + D
-
-    E = C4 * (I0 + I4)              F = C4 * (I0 - I4)
-    G = (C2 * I2) + (C6 * I6)       H = (C6 * I2) - (C2 * I6)
-    E. = E - G
-    G. = E + G
-
-    A.. = F + A.                    B.. = B. - H
-    F.  = F - A.                    H.  = B. + H
-
-    R0 = G. + C.    R1 = A.. + H.   R3 = E. + D.    R5 = F. + B..
-    R7 = G. - C.    R2 = A.. - H.   R4 = E. - D.    R6 = F. - B..
-
-    To:
-
-
-    A = (C1 * I1)                   B = (C7 * I1)
-    C = 0                           D = 0
-    A. = C4 * A                     B. = C4 * B
-    C. = A                          D. = B
-
-    E = C4 * I0                     F = E
-    G = 0                           H = 0
-    E. = E
-    G. = E
-
-    A.. = E + A.                    B.. = B.
-    F.  = E - A.                    H.  = B.
-
-    R0 = E + A      R1 = E + A. + B.    R3 = E + B      R5 = E - A. + B.
-    R7 = E - A      R2 = E + A. - B.    R4 = E - B      R6 = F - A. - B.
-
-******************************************************************************************/
-
-#define RowIDCT_3 ASM("\n"\
-    "#RowIDCT_3\n"\
-    "   movq        "I(1)","r7"\n"  /* r7 = I1                      */  \
-    "   movq        "C(1)","r0"\n"  /* r0 = C1                      */  \
-    "   movq        "C(7)","r3"\n"  /* r3 = C7                      */  \
-    "   pmulhw      "r7","r0"\n"    /* r0 = C1 * I1 - I1            */  \
-    "   pmulhw      "r7","r3"\n"    /* r3 = C7 * I1 = B, D.         */  \
-    "   movq        "I(0)","r6"\n"  /* r6 = I0                      */  \
-    "   movq        "C(4)","r4"\n"  /* r4 = C4                      */  \
-    "   paddw       "r7","r0"\n"    /* r0 = C1 * I1 = A, C.         */  \
-    "   movq        "r6","r1"\n"    /* make a copy of I0            */  \
-    "   pmulhw      "r4","r6"\n"    /* r2 = C4 * I0 - I0            */  \
-    "   movq        "r0","r2"\n"    /* make a copy of A             */  \
-    "   movq        "r3","r5"\n"    /* make a copy of B             */  \
-    "   pmulhw      "r4","r2"\n"    /* r2 = C4 * A - A              */  \
-    "   pmulhw      "r4","r5"\n"    /* r5 = C4 * B - B              */  \
-    "   paddw       "r1","r6"\n"    /* r2 = C4 * I0 = E, F          */  \
-    "   movq        "r6","r4"\n"    /* r4 = E                       */  \
-    "   paddw       "r0","r2"\n"    /* r2 = A.                      */  \
-    "   paddw       "r3","r5"\n"    /* r5 = B.                      */  \
-    "   movq        "r6","r7"\n"    /* r7 = E                       */  \
-    "   movq        "r5","r1"\n"    /* r1 = B.                      */  \
-    /*  r0 = A      */   \
-    /*  r3 = B      */   \
-    /*  r2 = A.     */   \
-    /*  r5 = B.     */   \
-    /*  r6 = E      */   \
-    /*  r4 = E      */   \
-    /*  r7 = E      */   \
-    /*  r1 = B.     */   \
-    "   psubw       "r2","r6"\n"    /* r6 = E - A.                  */  \
-    "   psubw       "r3","r4"\n"    /* r4 = E - B ----R4            */  \
-    "   psubw       "r0","r7"\n"    /* r7 = E - A ----R7            */  \
-    "   paddw       "r2","r2"\n"    /* r2 = A. + A.                 */  \
-    "   paddw       "r3","r3"\n"    /* r3 = B + B                   */  \
-    "   paddw       "r0","r0"\n"    /* r0 = A + A                   */  \
-    "   paddw       "r6","r2"\n"    /* r2 = E + A.                  */  \
-    "   paddw       "r4","r3"\n"    /* r3 = E + B ----R3            */  \
-    "   psubw       "r1","r2"\n"    /* r2 = E + A. - B. ----R2      */  \
-    "   psubw       "r5","r6"\n"    /* r6 = E - A. - B. ----R6      */  \
-    "   paddw       "r1","r1"\n"    /* r1 = B. + B.                 */  \
-    "   paddw       "r5","r5"\n"    /* r5 = B. + B.                 */  \
-    "   paddw       "r7","r0"\n"    /* r0 = E + A ----R0            */  \
-    "   paddw       "r2","r1"\n"    /* r1 = E + A. + B. -----R1     */  \
-    "   movq        "r1","I(1)"\n"  /* save r1                      */  \
-    "   paddw       "r6","r5"\n"    /* r5 = E - A. + B. -----R5     */  \
-    "#end RowIDCT_3\n"\
-);
-//End of RowIDCT_3
-
-#define ColumnIDCT_3 ASM("\n"\
-    "#ColumnIDCT_3\n"\
-    "   movq        "I(1)","r7"\n"  /* r7 = I1                      */  \
-    "   movq        "C(1)","r0"\n"  /* r0 = C1                      */  \
-    "   movq        "C(7)","r3"\n"  /* r3 = C7                      */  \
-    "   pmulhw      "r7","r0"\n"    /* r0 = C1 * I1 - I1            */  \
-    "   pmulhw      "r7","r3"\n"    /* r3 = C7 * I1 = B, D.         */  \
-    "   movq        "I(0)","r6"\n"  /* r6 = I0                      */  \
-    "   movq        "C(4)","r4"\n"  /* r4 = C4                      */  \
-    "   paddw       "r7","r0"\n"    /* r0 = C1 * I1 = A, C.         */  \
-    "   movq        "r6","r1"\n"    /* make a copy of I0            */  \
-    "   pmulhw      "r4","r6"\n"    /* r2 = C4 * I0 - I0            */  \
-    "   movq        "r0","r2"\n"    /* make a copy of A             */  \
-    "   movq        "r3","r5"\n"    /* make a copy of B             */  \
-    "   pmulhw      "r4","r2"\n"    /* r2 = C4 * A - A              */  \
-    "   pmulhw      "r4","r5"\n"    /* r5 = C4 * B - B              */  \
-    "   paddw       "r1","r6"\n"    /* r2 = C4 * I0 = E, F          */  \
-    "   movq        "r6","r4"\n"    /* r4 = E                       */  \
-    "   paddw       "Eight","r6"\n" /* +8 for shift                 */  \
-    "   paddw       "Eight","r4"\n" /* +8 for shift                 */  \
-    "   paddw       "r0","r2"\n"    /* r2 = A.                      */  \
-    "   paddw       "r3","r5"\n"    /* r5 = B.                      */  \
-    "   movq        "r6","r7"\n"    /* r7 = E                       */  \
-    "   movq        "r5","r1"\n"    /* r1 = B.                      */  \
-/*  r0 = A      */   \
-/*  r3 = B      */   \
-/*  r2 = A.     */   \
-/*  r5 = B.     */   \
-/*  r6 = E      */   \
-/*  r4 = E      */   \
-/*  r7 = E      */   \
-/*  r1 = B.     */   \
-    "   psubw       "r2","r6"\n"    /* r6 = E - A.                  */  \
-    "   psubw       "r3","r4"\n"    /* r4 = E - B ----R4            */  \
-    "   psubw       "r0","r7"\n"    /* r7 = E - A ----R7            */  \
-    "   paddw       "r2","r2"\n"    /* r2 = A. + A.                 */  \
-    "   paddw       "r3","r3"\n"    /* r3 = B + B                   */  \
-    "   paddw       "r0","r0"\n"    /* r0 = A + A                   */  \
-    "   paddw       "r6","r2"\n"    /* r2 = E + A.                  */  \
-    "   paddw       "r4","r3"\n"    /* r3 = E + B ----R3            */  \
-    "   psraw        $4,"r4"\n"     /* shift                        */  \
-    "   movq        "r4","J(4)"\n"  /* store R4 at J4               */  \
-    "   psraw       $4,"r3"\n"      /* shift                        */  \
-    "   movq        "r3","I(3)"\n"  /* store R3 at I3               */  \
-    "   psubw       "r1","r2"\n"    /* r2 = E + A. - B. ----R2      */  \
-    "   psubw       "r5","r6"\n"    /* r6 = E - A. - B. ----R6      */  \
-    "   paddw       "r1","r1"\n"    /* r1 = B. + B.                 */  \
-    "   paddw       "r5","r5"\n"    /* r5 = B. + B.                 */  \
-    "   paddw       "r7","r0"\n"    /* r0 = E + A ----R0            */  \
-    "   paddw       "r2","r1"\n"    /* r1 = E + A. + B. -----R1     */  \
-    "   psraw       $4,"r7"\n"      /* shift                        */  \
-    "   psraw       $4,"r2"\n"      /* shift                        */  \
-    "   psraw       $4,"r0"\n"      /* shift                        */  \
-    "   psraw       $4,"r1"\n"      /* shift                        */  \
-    "   movq        "r7","J(7)"\n"  /* store R7 to J7               */  \
-    "   movq        "r0","I(0)"\n"  /* store R0 to I0               */  \
-    "   movq        "r1","I(1)"\n"  /* store R1 to I1               */  \
-    "   movq        "r2","I(2)"\n"  /* store R2 to I2               */  \
-    "   movq        "r1","I(1)"\n"  /* save r1                      */  \
-    "   paddw       "r6","r5"\n"    /* r5 = E - A. + B. -----R5     */  \
-    "   psraw       $4,"r5"\n"      /* shift                        */  \
-    "   movq        "r5","J(5)"\n"  /* store R5 at J5               */  \
-    "   psraw       $4,"r6"\n"      /* shift                        */  \
-    "   movq        "r6","J(6)"\n"  /* store R6 at J6               */  \
-    "#end ColumnIDCT_3\n"\
-);
-//End of ColumnIDCT_3
-
-void IDct3__mmx( Q_LIST_ENTRY * InputData,
-            ogg_int16_t *QuantMatrix,
-            ogg_int16_t * OutputData ) {
-
-#   define MIDM(M,I)    MtoSTR(M+I*8(%ecx))
-#   define M(I)         MIDM( MaskOffset , I )
-#   define MIDC(M,I)    MtoSTR(M+(I-1)*8(%ecx))
-#   define C(I)         MIDC( CosineOffset , I )
-#   define MIDEight(M)  MtoSTR(M(%ecx))
-#   define Eight        MIDEight(EightOffset)
-
-#   define r0   "%mm0"
-#   define r1   "%mm1"
-#   define r2   "%mm2"
-#   define r3   "%mm3"
-#   define r4   "%mm4"
-#   define r5   "%mm5"
-#   define r6   "%mm6"
-#   define r7   "%mm7"
-
-    __asm__ __volatile__ (
-    /* eax = quantized input */
-    /* esi = quantization table */
-    /* edx = destination (= idct buffer) */
-    /* ecx = idctconstants */
-    ""
-    :
-    :"a"(InputData), "S"(QuantMatrix), "d"(OutputData), "c"(idctconstants)
-    );
-
-    ASM(
-    "movq   (%eax), "r0"\n"
-    "pmullw (%esi), "r0"\n"     /* r0 = 03 02 01 00 */
-    "movq   "M(0)", "r2"\n"     /* r2 = __ __ __ FF */
-    "movq   "r0", "r3"\n"       /* r3 = 03 02 01 00 */
-    "psrlq  $16, "r0"\n"        /* r0 = __ 03 02 01 */
-    "pand   "r2", "r3"\n"       /* r3 = __ __ __ 00 */
-    "movq   "r0", "r5"\n"       /* r5 = __ 03 02 01 */
-    "pand   "r2", "r5"\n"       /* r5 = __ __ __ 01 */
-    "pxor   "r5", "r0"\n"       /* r0 = __ 03 02 __ */
-    "por    "r3", "r0"\n"       /* r0 = __ 03 02 00 */
-    "movq   "r0", (%edx)\n"     /* write R0 = r0 */
-    "movq   "r5", 16(%edx)\n"   /* write R1 = r5 */
-    );
-#   undef M
-
-/* Done partial transpose; now do the idct itself. */
-
-#   define I( K)    MtoSTR(K*16(%edx))
-#   define J( K)    MtoSTR(((K - 4)*16)+8(%edx))
-
-    RowIDCT_3       /* 33 c */
-    Transpose       /* 19 c */
-
-#   undef I
-#   undef J
-//# define I( K)    [edx + (  K      * 16) + 64]
-//# define J( K)    [edx + ( (K - 4) * 16) + 72]
-
-//  RowIDCT         ; 46 c
-//  Transpose       ; 19 c
-
-//# undef I
-//# undef J
-#   define I( K)    MtoSTR((K * 16)(%edx))
-#   define J( K)    I( K)
-
-    ColumnIDCT_3    /* 44 c */
-
-#   undef I
-#   undef J
-#   define I( K)    MtoSTR((K*16)+8(%edx))
-#   define J( K)    I( K)
-
-    ColumnIDCT_3    /* 44 c */
-
-#   undef I
-#   undef J
-
-    ASM("emms\n");
-}
-
-
-/* install our implementation in the function table */
-void dsp_mmx_idct_init(DspFunctions *funcs)
-{
-  funcs->IDctSlow = IDctSlow__mmx;
-  funcs->IDct10 = IDct10__mmx;
-  funcs->IDct3 = IDct3__mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 182
Engine/lib/libtheora/lib/enc/x86_32/recon_mmx.c

@@ -1,182 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: recon_mmx.c 15153 2008-08-04 18:37:55Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL;
-
-static void copy8x8__mmx (unsigned char *src,
-                        unsigned char *dest,
-                        unsigned int stride)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                      \n\t"
-
-    "  lea         (%2, %2, 2), %%edi  \n\t"
-
-    "  movq        (%1), %%mm0         \n\t"
-    "  movq        (%1, %2), %%mm1     \n\t"
-    "  movq        (%1, %2, 2), %%mm2  \n\t"
-    "  movq        (%1, %%edi), %%mm3  \n\t"
-
-    "  lea         (%1, %2, 4), %1     \n\t"
-
-    "  movq        %%mm0, (%0)         \n\t"
-    "  movq        %%mm1, (%0, %2)     \n\t"
-    "  movq        %%mm2, (%0, %2, 2)  \n\t"
-    "  movq        %%mm3, (%0, %%edi)  \n\t"
-
-    "  lea         (%0, %2, 4), %0     \n\t"
-
-    "  movq        (%1), %%mm0         \n\t"
-    "  movq        (%1, %2), %%mm1     \n\t"
-    "  movq        (%1, %2, 2), %%mm2  \n\t"
-    "  movq        (%1, %%edi), %%mm3  \n\t"
-
-    "  movq        %%mm0, (%0)         \n\t"
-    "  movq        %%mm1, (%0, %2)     \n\t"
-    "  movq        %%mm2, (%0, %2, 2)  \n\t"
-    "  movq        %%mm3, (%0, %%edi)  \n\t"
-      : "+a" (dest)
-      : "c" (src),
-        "d" (stride)
-      : "memory", "edi"
-  );
-}
-
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-                      ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                      \n\t"
-
-    "  movq        %[V128], %%mm0      \n\t" /* Set mm0 to 0x8080808080808080 */
-
-    "  lea         128(%1), %%edi      \n\t" /* Endpoint in input buffer */
-    "1:                                \n\t"
-    "  movq         (%1), %%mm2        \n\t" /* First four input values */
-
-    "  packsswb    8(%1), %%mm2        \n\t" /* pack with next(high) four values */
-    "  por         %%mm0, %%mm0        \n\t"
-    "  pxor        %%mm0, %%mm2        \n\t" /* Convert result to unsigned (same as add 128) */
-    "  lea         16(%1), %1          \n\t" /* Step source buffer */
-    "  cmp         %%edi, %1           \n\t" /* are we done */
-
-    "  movq        %%mm2, (%0)         \n\t" /* store results */
-
-    "  lea         (%0, %2), %0        \n\t" /* Step output buffer */
-    "  jc          1b                  \n\t" /* Loop back if we are not done */
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (LineStep),
-        [V128] "m" (V128)
-      : "memory", "edi"
-  );
-}
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
-                      ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                      \n\t"
-
-    "  pxor        %%mm0, %%mm0        \n\t"
-    "  lea         128(%1), %%edi      \n\t"
-
-    "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
-
-    "  movq        (%1), %%mm4         \n\t" /* first 4 changes */
-    "  movq        %%mm2, %%mm3        \n\t"
-    "  movq        8(%1), %%mm5        \n\t" /* last 4 changes */
-    "  punpcklbw   %%mm0, %%mm2        \n\t" /* turn first 4 refs into positive 16-bit #s */
-    "  paddsw      %%mm4, %%mm2        \n\t" /* add in first 4 changes */
-    "  punpckhbw   %%mm0, %%mm3        \n\t" /* turn last 4 refs into positive 16-bit #s */
-    "  paddsw      %%mm5, %%mm3        \n\t" /* add in last 4 changes */
-    "  add         %3, %2              \n\t" /* next row of reference pixels */
-    "  packuswb    %%mm3, %%mm2        \n\t" /* pack result to unsigned 8-bit values */
-    "  lea         16(%1), %1          \n\t" /* next row of changes */
-    "  cmp         %%edi, %1            \n\t" /* are we done? */
-
-    "  movq        %%mm2, (%0)         \n\t" /* store result */
-
-    "  lea         (%0, %3), %0        \n\t" /* next row of output */
-    "  jc          1b                  \n\t"
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (RefPtr),
-        "r" (LineStep)
-      : "memory", "edi"
-  );
-}
-
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
-                           unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-                           ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .p2align 4                      \n\t"
-
-    "  pxor        %%mm0, %%mm0        \n\t"
-    "  lea         128(%1), %%edi      \n\t"
-
-    "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
-    "  movq        (%3), %%mm4         \n\t" /* (+3 misaligned) 8 reference pixels */
-
-    "  movq        %%mm2, %%mm3        \n\t"
-    "  punpcklbw   %%mm0, %%mm2        \n\t" /* mm2 = start ref1 as positive 16-bit #s */
-    "  movq        %%mm4, %%mm5        \n\t"
-    "  movq        (%1), %%mm6         \n\t" /* first 4 changes */
-    "  punpckhbw   %%mm0, %%mm3        \n\t" /* mm3 = end ref1 as positive 16-bit #s */
-    "  movq        8(%1), %%mm7        \n\t" /* last 4 changes */
-    "  punpcklbw   %%mm0, %%mm4        \n\t" /* mm4 = start ref2 as positive 16-bit #s */
-    "  punpckhbw   %%mm0, %%mm5        \n\t" /* mm5 = end ref2 as positive 16-bit #s */
-    "  paddw       %%mm4, %%mm2        \n\t" /* mm2 = start (ref1 + ref2) */
-    "  paddw       %%mm5, %%mm3        \n\t" /* mm3 = end (ref1 + ref2) */
-    "  psrlw       $1, %%mm2           \n\t" /* mm2 = start (ref1 + ref2)/2 */
-    "  psrlw       $1, %%mm3           \n\t" /* mm3 = end (ref1 + ref2)/2 */
-    "  paddw       %%mm6, %%mm2        \n\t" /* add changes to start */
-    "  paddw       %%mm7, %%mm3        \n\t" /* add changes to end */
-    "  lea         16(%1), %1          \n\t" /* next row of changes */
-    "  packuswb    %%mm3, %%mm2        \n\t" /* pack start|end to unsigned 8-bit */
-    "  add         %4, %2              \n\t" /* next row of reference pixels */
-    "  add         %4, %3              \n\t" /* next row of reference pixels */
-    "  movq        %%mm2, (%0)         \n\t" /* store result */
-    "  add         %4, %0              \n\t" /* next row of output */
-    "  cmp         %%edi, %1           \n\t" /* are we done? */
-    "  jc          1b                  \n\t"
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (RefPtr1),
-        "r" (RefPtr2),
-        "m" (LineStep)
-      : "memory", "edi"
-  );
-}
-
-void dsp_mmx_recon_init(DspFunctions *funcs)
-{
-  funcs->copy8x8 = copy8x8__mmx;
-  funcs->recon_intra8x8 = recon_intra8x8__mmx;
-  funcs->recon_inter8x8 = recon_inter8x8__mmx;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 1605
Engine/lib/libtheora/lib/enc/x86_32_vs/dsp_mmx.c

@@ -1,1605 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $
-
- ********************************************************************/
-
-#include <stdlib.h>
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if 0
-//These are to let me selectively enable the C versions, these are needed
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-#endif
-
-
-static const ogg_int64_t V128 = 0x0080008000800080;
-
-static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
-                  ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
-                  ogg_uint32_t ReconPixelsPerLine)
-{
-
-    //Make non-zero to use the C-version
-#if 0
-  int i;
-
-  /* For each block row */
-  for (i=8; i; i--) {
-    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);
-    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);
-    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);
-    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);
-    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);
-    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);
-    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);
-    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);
-
-    /* Start next row */
-    FiltPtr += PixelsPerLine;
-    ReconPtr += ReconPixelsPerLine;
-    DctInputPtr += 8;
-  }
-#else
-    __asm {
-        align 16
-
-        pxor    mm7, mm7
-
-        mov     eax, FiltPtr
-        mov     ebx, ReconPtr
-        mov     edx, DctInputPtr
-
-     /* You can't use rept in inline masm and macro parsing seems screwed with inline asm*/
-
-     /* ITERATION 1 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-     /* ITERATION 2 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-     /* ITERATION 3 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-     /* ITERATION 4 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-     /* ITERATION 5 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-     /* ITERATION 6 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-     /* ITERATION 7 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-     /* ITERATION 8 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    /* mm1 = ReconPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        movq    mm3, mm1    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    /* mm1 = INT16(ReconPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    /* mm3 = INT16(ReconPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - ReconPtr */
-        psubw   mm2, mm3    /* mm2 = FiltPtr - ReconPtr */
-        movq    [edx], mm0    /* write answer out */
-        movq    [8 + edx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   edx, 16
-        add   eax, PixelsPerLine
-        add   ebx, ReconPixelsPerLine
-
-
-
-
-
-    };
-
-#endif
-}
-
-static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
-                      ogg_uint32_t PixelsPerLine)
-{
-
-#if 0
-  int i;
-  /* For each block row */
-  for (i=8; i; i--) {
-    /* INTRA mode so code raw image data */
-    /* We convert the data to 8 bit signed (by subtracting 128) as
-       this reduces the internal precision requirments in the DCT
-       transform. */
-    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);
-    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);
-    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);
-    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);
-    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);
-    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);
-    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);
-    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);
-
-    /* Start next row */
-    FiltPtr += PixelsPerLine;
-    DctInputPtr += 8;
-  }
-
-#else
-    __asm {
-        align 16
-
-        pxor    mm7, mm7
-
-        mov         eax, FiltPtr
-        mov         ebx, DctInputPtr
-
-        movq    mm1, V128
-
-        /*  ITERATION 1 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-
-        /*  ITERATION 2 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-
-        /*  ITERATION 3 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-
-        /*  ITERATION 4 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-
-        /*  ITERATION 5 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-
-        /*  ITERATION 6 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-
-        /*  ITERATION 7 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-
-        /*  ITERATION 8 */
-        movq    mm0, [eax]    /* mm0 = FiltPtr */
-        movq    mm2, mm0    /* dup to prepare for up conversion */
-        /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    /* mm0 = INT16(FiltPtr) */
-        punpckhbw   mm2, mm7    /* mm2 = INT16(FiltPtr) */
-        /* start calculation */
-        psubw   mm0, mm1    /* mm0 = FiltPtr - 128 */
-        psubw   mm2, mm1    /* mm2 = FiltPtr - 128 */
-        movq    [ebx], mm0    /* write answer out */
-        movq    [8 + ebx], mm2    /* write answer out */
-        /* Increment pointers */
-        add   ebx, 16
-        add   eax, PixelsPerLine
-
-    };
-
-#endif
-}
-
-
-
-
-static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
-                     unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
-                     ogg_uint32_t PixelsPerLine,
-                     ogg_uint32_t ReconPixelsPerLine)
-{
-
-#if 0
-  int i;
-
-  /* For each block row */
-  for (i=8; i; i--) {
-    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
-    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
-    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
-    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
-    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
-    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
-    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
-    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
-
-    /* Start next row */
-    FiltPtr += PixelsPerLine;
-    ReconPtr1 += ReconPixelsPerLine;
-    ReconPtr2 += ReconPixelsPerLine;
-    DctInputPtr += 8;
-  }
-#else
-
-    __asm {
-        align 16
-
-            pxor        mm7, mm7
-
-        mov         eax, FiltPtr
-        mov         ebx, ReconPtr1
-        mov         ecx, ReconPtr2
-        mov         edx, DctInputPtr
-
-        /*  ITERATION 1 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-
-        /*  ITERATION 2 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-
-        /*  ITERATION 3 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-
-        /*  ITERATION 4 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-
-        /*  ITERATION 5 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-
-        /*  ITERATION 6 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-
-        /*  ITERATION 7 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-
-        /*  ITERATION 8 */
-        movq    mm0, [eax]    ; /* mm0 = FiltPtr */
-        movq    mm1, [ebx]    ; /* mm1 = ReconPtr1 */
-        movq    mm4, [ecx]    ; /* mm1 = ReconPtr2 */
-        movq    mm2, mm0    ; /* dup to prepare for up conversion */
-        movq    mm3, mm1    ; /* dup to prepare for up conversion */
-        movq    mm5, mm4    ; /* dup to prepare for up conversion */
-          ; /* convert from UINT8 to INT16 */
-        punpcklbw   mm0, mm7    ; /* mm0 = INT16(FiltPtr) */
-        punpcklbw   mm1, mm7    ; /* mm1 = INT16(ReconPtr1) */
-        punpcklbw   mm4, mm7    ; /* mm1 = INT16(ReconPtr2) */
-        punpckhbw   mm2, mm7    ; /* mm2 = INT16(FiltPtr) */
-        punpckhbw   mm3, mm7    ; /* mm3 = INT16(ReconPtr1) */
-        punpckhbw   mm5, mm7    ; /* mm3 = INT16(ReconPtr2) */
-          ; /* average ReconPtr1 and ReconPtr2 */
-        paddw   mm1, mm4    ; /* mm1 = ReconPtr1 + ReconPtr2 */
-        paddw   mm3, mm5    ; /* mm3 = ReconPtr1 + ReconPtr2 */
-        psrlw   mm1, 1    ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-        psrlw   mm3, 1    ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-        psubw   mm0, mm1    ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        psubw   mm2, mm3    ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-        movq    [edx], mm0    ; /* write answer out */
-        movq    [8 + edx], mm2    ; /* write answer out */
-          ; /* Increment pointers */
-        add   edx, 16   ;
-        add   eax, PixelsPerLine    ;
-        add   ebx, ReconPixelsPerLine   ;
-        add   ecx, ReconPixelsPerLine   ;
-
-    };
-
-
-
-
-
-#endif
-}
-
-static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
-{
-
-#if 0
-  ogg_uint32_t SadValue;
-  ogg_uint32_t SadValue1;
-
-  SadValue    = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) +
-          DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
-          DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
-          DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
-
-  SadValue1   = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) +
-          DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
-          DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
-          DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
-
-  SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
-
-  return SadValue;
-
-#else
-  ogg_uint32_t MaxSad;
-
-
-  __asm {
-    align       16
-    mov         ebx, Src1
-    mov         ecx, Src2
-
-
-    pxor    mm6, mm6    ; /* zero out mm6 for unpack */
-    pxor    mm7, mm7    ; /* zero out mm7 for unpack */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [ecx]    ;
-
-    movq    mm2, mm0    ;
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* ; unpack low four bytes to higher precision */
-    punpckhbw   mm1, mm7    ; /* ; unpack high four bytes to higher precision */
-
-    movq    mm2, mm0    ;
-    movq    mm3, mm1    ;
-    psrlq   mm2, 32   ; /* fold and add */
-    psrlq   mm3, 32   ;
-    paddw   mm0, mm2    ;
-    paddw   mm1, mm3    ;
-    movq    mm2, mm0    ;
-    movq    mm3, mm1    ;
-    psrlq   mm2, 16   ;
-    psrlq   mm3, 16   ;
-    paddw   mm0, mm2    ;
-    paddw   mm1, mm3    ;
-
-    psubusw   mm1, mm0    ;
-    paddw   mm1, mm0    ; /* mm1 = max(mm1, mm0) */
-    movd    eax, mm1    ;
-
-    and         eax, 0xffff
-    mov         MaxSad, eax
-  };
-   return MaxSad;
-
-
-
-
-
-#endif
-}
-
-
-
-
-static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
-                        ogg_uint32_t stride)
-{
-
-#if 0
-  ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
-  ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
-  ogg_uint32_t MaxSad = 0;
-  ogg_uint32_t i;
-
-  for ( i = 0; i < 4; i++ ){
-    SadValue[0] += abs(Src1[0] - Src2[0]);
-    SadValue[1] += abs(Src1[1] - Src2[1]);
-    SadValue[2] += abs(Src1[2] - Src2[2]);
-    SadValue[3] += abs(Src1[3] - Src2[3]);
-    SadValue[4] += abs(Src1[4] - Src2[4]);
-    SadValue[5] += abs(Src1[5] - Src2[5]);
-    SadValue[6] += abs(Src1[6] - Src2[6]);
-    SadValue[7] += abs(Src1[7] - Src2[7]);
-
-    Src1 += stride;
-    Src2 += stride;
-  }
-
-  for ( i = 0; i < 4; i++ ){
-    SadValue2[0] += abs(Src1[0] - Src2[0]);
-    SadValue2[1] += abs(Src1[1] - Src2[1]);
-    SadValue2[2] += abs(Src1[2] - Src2[2]);
-    SadValue2[3] += abs(Src1[3] - Src2[3]);
-    SadValue2[4] += abs(Src1[4] - Src2[4]);
-    SadValue2[5] += abs(Src1[5] - Src2[5]);
-    SadValue2[6] += abs(Src1[6] - Src2[6]);
-    SadValue2[7] += abs(Src1[7] - Src2[7]);
-
-    Src1 += stride;
-    Src2 += stride;
-  }
-
-  for ( i = 0; i < 8; i++ ){
-    if ( SadValue[i] > MaxSad )
-      MaxSad = SadValue[i];
-    if ( SadValue2[i] > MaxSad )
-      MaxSad = SadValue2[i];
-  }
-
-  return MaxSad;
-#else
-  ogg_uint32_t MaxSad;
-
-
-    __asm {
-        align       16
-        mov         ebx, Src1
-        mov         ecx, Src2
-
-        pxor    mm3, mm3    ; /* zero out mm3 for unpack */
-        pxor    mm4, mm4    ; /* mm4 low sum */
-        pxor    mm5, mm5    ; /* mm5 high sum */
-        pxor    mm6, mm6    ; /* mm6 low sum */
-        pxor    mm7, mm7    ; /* mm7 high sum */
-        mov   edi, 4    ; /* 4 rows */
-        label_1:        ;
-        movq    mm0, [ebx]    ; /* take 8 bytes */
-        movq    mm1, [ecx]    ; /* take 8 bytes */
-
-        movq    mm2, mm0    ;
-        psubusb   mm0, mm1    ; /* A - B */
-        psubusb   mm1, mm2    ; /* B - A */
-        por   mm0, mm1    ; /* and or gives abs difference */
-        movq    mm1, mm0    ;
-
-        punpcklbw   mm0, mm3    ; /* unpack to higher precision for accumulation */
-        paddw   mm4, mm0    ; /* accumulate difference... */
-        punpckhbw   mm1, mm3    ; /* unpack high four bytes to higher precision */
-        paddw   mm5, mm1    ; /* accumulate difference... */
-        add   ebx, stride   ; /* Inc pointer into the new data */
-        add   ecx, stride   ; /* Inc pointer into the new data */
-
-        dec   edi   ;
-        jnz   label_1   ;
-
-        mov   edi, 4    ; /* 4 rows */
-        label_2:        ;
-        movq    mm0, [ebx]    ; /* take 8 bytes */
-        movq    mm1, [ecx]    ; /* take 8 bytes */
-
-        movq    mm2, mm0    ;
-        psubusb   mm0, mm1    ; /* A - B */
-        psubusb   mm1, mm2    ; /* B - A */
-        por   mm0, mm1    ; /* and or gives abs difference */
-        movq    mm1, mm0    ;
-
-        punpcklbw   mm0, mm3    ; /* unpack to higher precision for accumulation */
-        paddw   mm6, mm0    ; /* accumulate difference... */
-        punpckhbw   mm1, mm3    ; /* unpack high four bytes to higher precision */
-        paddw   mm7, mm1    ; /* accumulate difference... */
-        add   ebx, stride   ; /* Inc pointer into the new data */
-        add   ecx, stride   ; /* Inc pointer into the new data */
-
-        dec   edi   ;
-        jnz   label_2   ;
-
-        psubusw   mm7, mm6    ;
-        paddw   mm7, mm6    ; /* mm7 = max(mm7, mm6) */
-        psubusw   mm5, mm4    ;
-        paddw   mm5, mm4    ; /* mm5 = max(mm5, mm4) */
-        psubusw   mm7, mm5    ;
-        paddw   mm7, mm5    ; /* mm7 = max(mm5, mm7) */
-        movq    mm6, mm7    ;
-        psrlq   mm6, 32   ;
-        psubusw   mm7, mm6    ;
-        paddw   mm7, mm6    ; /* mm7 = max(mm5, mm7) */
-        movq    mm6, mm7    ;
-        psrlq   mm6, 16   ;
-        psubusw   mm7, mm6    ;
-        paddw   mm7, mm6    ; /* mm7 = max(mm5, mm7) */
-        movd    eax, mm7    ;
-        and       eax, 0xffff   ;
-
-        mov         MaxSad, eax
-    };
-
-    return MaxSad;
-
-
-#endif
-}
-
-static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
-                unsigned char *ptr2, ogg_uint32_t stride2)
-{
-
-#if 0
-  ogg_uint32_t  i;
-  ogg_uint32_t  sad = 0;
-
-  for (i=8; i; i--) {
-    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
-    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
-    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
-    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
-    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
-    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
-    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
-    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
-    /* Step to next row of block. */
-    ptr1 += stride1;
-    ptr2 += stride2;
-  }
-
-  return sad;
-#else
-  ogg_uint32_t  DiffVal;
-
-  __asm {
-    align  16
-
-    mov         ebx, ptr1
-    mov         edx, ptr2
-
-    pxor    mm6, mm6    ; /* zero out mm6 for unpack */
-    pxor    mm7, mm7    ; /* mm7 contains the result */
-
-    ; /* ITERATION 1 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-    ; /* ITERATION 2 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-
-    ; /* ITERATION 3 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-    ; /* ITERATION 4 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-
-    ; /* ITERATION 5 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-
-    ; /* ITERATION 6 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-
-    ; /* ITERATION 7 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-
-
-    ; /* ITERATION 8 */
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, stride1    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   edx, stride2    ; /* Inc pointer into ref data */
-
-
-
-    ; /* ------ */
-
-    movq    mm0, mm7    ;
-    psrlq   mm7, 32   ;
-    paddw   mm7, mm0    ;
-    movq    mm0, mm7    ;
-    psrlq   mm7, 16   ;
-    paddw   mm7, mm0    ;
-    movd    eax, mm7    ;
-    and       eax, 0xffff   ;
-
-    mov         DiffVal, eax
-  };
-
-  return DiffVal;
-
-
-
-#endif
-}
-
-static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
-                unsigned char *ptr2, ogg_uint32_t stride2,
-            ogg_uint32_t thres)
-{
-#if 0
-  ogg_uint32_t  i;
-  ogg_uint32_t  sad = 0;
-
-  for (i=8; i; i--) {
-    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
-    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
-    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
-    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
-    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
-    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
-    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
-    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
-
-    if (sad > thres )
-      break;
-
-    /* Step to next row of block. */
-    ptr1 += stride1;
-    ptr2 += stride2;
-  }
-
-  return sad;
-#else
-  return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
-#endif
-}
-
-
-static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                          unsigned char *RefDataPtr1,
-                    unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
-                    ogg_uint32_t thres)
-{
-#if 0
-  ogg_uint32_t  i;
-  ogg_uint32_t  sad = 0;
-
-  for (i=8; i; i--) {
-    sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
-    sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
-    sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
-    sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
-    sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
-    sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
-    sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
-    sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
-
-    if ( sad > thres )
-      break;
-
-    /* Step to next row of block. */
-    SrcData += SrcStride;
-    RefDataPtr1 += RefStride;
-    RefDataPtr2 += RefStride;
-  }
-
-  return sad;
-#else
-  ogg_uint32_t  DiffVal;
-
-  __asm {
-    align 16
-
-        mov     ebx, SrcData
-        mov     ecx, RefDataPtr1
-        mov     edx, RefDataPtr2
-
-
-    pcmpeqd   mm5, mm5    ; /* fefefefefefefefe in mm5 */
-    paddb   mm5, mm5    ;
-            ;
-    pxor    mm6, mm6    ; /* zero out mm6 for unpack */
-    pxor    mm7, mm7    ; /* mm7 contains the result */
-    mov   edi, 8    ; /* 8 rows */
-    loop_start:       ;
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-
-    movq    mm2, [ecx]    ;
-    movq    mm3, [edx]    ; /* take average of mm2 and mm3 */
-    movq    mm1, mm2    ;
-    pand    mm1, mm3    ;
-    pxor    mm3, mm2    ;
-    pand    mm3, mm5    ;
-    psrlq   mm3, 1    ;
-    paddb   mm1, mm3    ;
-
-    movq    mm2, mm0    ;
-
-    psubusb   mm0, mm1    ; /* A - B */
-    psubusb   mm1, mm2    ; /* B - A */
-    por   mm0, mm1    ; /* and or gives abs difference */
-    movq    mm1, mm0    ;
-
-    punpcklbw   mm0, mm6    ; /* unpack to higher precision for accumulation */
-    paddw   mm7, mm0    ; /* accumulate difference... */
-    punpckhbw   mm1, mm6    ; /* unpack high four bytes to higher precision */
-    add   ebx, SrcStride    ; /* Inc pointer into the new data */
-    paddw   mm7, mm1    ; /* accumulate difference... */
-    add   ecx, RefStride    ; /* Inc pointer into ref data */
-    add   edx, RefStride    ; /* Inc pointer into ref data */
-
-    dec   edi   ;
-    jnz   loop_start    ;
-
-    movq    mm0, mm7    ;
-    psrlq   mm7, 32   ;
-    paddw   mm7, mm0    ;
-    movq    mm0, mm7    ;
-    psrlq   mm7, 16   ;
-    paddw   mm7, mm0    ;
-    movd    eax, mm7    ;
-    and   eax, 0xffff   ;
-
-    mov DiffVal, eax
-  };
-
-  return DiffVal;
-
-
-
-#endif
-}
-
-static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
-#if 0
-  ogg_uint32_t  i;
-  ogg_uint32_t  XSum=0;
-  ogg_uint32_t  XXSum=0;
-
-  for (i=8; i; i--) {
-     /* Examine alternate pixel locations. */
-     XSum += DataPtr[0];
-     XXSum += DataPtr[0]*DataPtr[0];
-     XSum += DataPtr[1];
-     XXSum += DataPtr[1]*DataPtr[1];
-     XSum += DataPtr[2];
-     XXSum += DataPtr[2]*DataPtr[2];
-     XSum += DataPtr[3];
-     XXSum += DataPtr[3]*DataPtr[3];
-     XSum += DataPtr[4];
-     XXSum += DataPtr[4]*DataPtr[4];
-     XSum += DataPtr[5];
-     XXSum += DataPtr[5]*DataPtr[5];
-     XSum += DataPtr[6];
-     XXSum += DataPtr[6]*DataPtr[6];
-     XSum += DataPtr[7];
-     XXSum += DataPtr[7]*DataPtr[7];
-
-     /* Step to next row of block. */
-     DataPtr += Stride;
-   }
-
-   /* Compute population variance as mis-match metric. */
-   return (( (XXSum<<6) - XSum*XSum ) );
-#else
-  ogg_uint32_t  XSum;
-  ogg_uint32_t  XXSum;
-
-  __asm {
-    align 16
-
-        mov     ecx, DataPtr
-
-    pxor    mm5, mm5    ;
-    pxor    mm6, mm6    ;
-    pxor    mm7, mm7    ;
-    mov   edi, 8    ;
-    loop_start:
-    movq    mm0, [ecx]    ; /* take 8 bytes */
-    movq    mm2, mm0    ;
-
-    punpcklbw   mm0, mm6    ;
-    punpckhbw   mm2, mm6    ;
-
-    paddw   mm5, mm0    ;
-    paddw   mm5, mm2    ;
-
-    pmaddwd   mm0, mm0    ;
-    pmaddwd   mm2, mm2    ;
-            ;
-    paddd   mm7, mm0    ;
-    paddd   mm7, mm2    ;
-
-    add   ecx, Stride   ; /* Inc pointer into src data */
-
-    dec   edi   ;
-    jnz   loop_start    ;
-
-    movq    mm0, mm5    ;
-    psrlq   mm5, 32   ;
-    paddw   mm5, mm0    ;
-    movq    mm0, mm5    ;
-    psrlq   mm5, 16   ;
-    paddw   mm5, mm0    ;
-    movd    edi, mm5    ;
-    movsx   edi, di   ;
-    mov   eax, edi    ;
-
-    movq    mm0, mm7    ;
-    psrlq   mm7, 32   ;
-    paddd   mm7, mm0    ;
-    movd    ebx, mm7    ;
-
-        mov         XSum, eax
-        mov         XXSum, ebx;
-
-  };
-    /* Compute population variance as mis-match metric. */
-    return (( (XXSum<<6) - XSum*XSum ) );
-
-
-
-#endif
-}
-
-static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                     unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
-
-#if 0
-  ogg_uint32_t  i;
-  ogg_uint32_t  XSum=0;
-  ogg_uint32_t  XXSum=0;
-  ogg_int32_t   DiffVal;
-
-  for (i=8; i; i--) {
-    DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    /* Step to next row of block. */
-    SrcData += SrcStride;
-    RefDataPtr += RefStride;
-  }
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-#else
-  ogg_uint32_t  XSum;
-  ogg_uint32_t  XXSum;
-
-
-  __asm {
-    align 16
-
-        mov     ecx, SrcData
-        mov     edx, RefDataPtr
-
-    pxor    mm5, mm5    ;
-    pxor    mm6, mm6    ;
-    pxor    mm7, mm7    ;
-    mov   edi, 8    ;
-    loop_start:       ;
-    movq    mm0, [ecx]    ; /* take 8 bytes */
-    movq    mm1, [edx]    ;
-    movq    mm2, mm0    ;
-    movq    mm3, mm1    ;
-
-    punpcklbw   mm0, mm6    ;
-    punpcklbw   mm1, mm6    ;
-    punpckhbw   mm2, mm6    ;
-    punpckhbw   mm3, mm6    ;
-
-    psubsw    mm0, mm1    ;
-    psubsw    mm2, mm3    ;
-
-    paddw   mm5, mm0    ;
-    paddw   mm5, mm2    ;
-
-    pmaddwd   mm0, mm0    ;
-    pmaddwd   mm2, mm2    ;
-            ;
-    paddd   mm7, mm0    ;
-    paddd   mm7, mm2    ;
-
-    add   ecx, SrcStride    ; /* Inc pointer into src data */
-    add   edx, RefStride    ; /* Inc pointer into ref data */
-
-    dec   edi   ;
-    jnz   loop_start    ;
-
-    movq    mm0, mm5    ;
-    psrlq   mm5, 32   ;
-    paddw   mm5, mm0    ;
-    movq    mm0, mm5    ;
-    psrlq   mm5, 16   ;
-    paddw   mm5, mm0    ;
-    movd    edi, mm5    ;
-    movsx   edi, di   ;
-    mov   eax, edi    ;
-
-    movq    mm0, mm7    ;
-    psrlq   mm7, 32   ;
-    paddd   mm7, mm0    ;
-    movd    ebx, mm7    ;
-
-        mov     XSum, eax
-        mov     XXSum, ebx
-
-  };
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-
-
-#endif
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                         unsigned char *RefDataPtr1,
-             unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
-#if 0
-  ogg_uint32_t  i;
-  ogg_uint32_t  XSum=0;
-  ogg_uint32_t  XXSum=0;
-  ogg_int32_t   DiffVal;
-
-  for (i=8; i; i--) {
-    DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
-    XSum += DiffVal;
-    XXSum += DiffVal*DiffVal;
-
-    /* Step to next row of block. */
-    SrcData += SrcStride;
-    RefDataPtr1 += RefStride;
-    RefDataPtr2 += RefStride;
-  }
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-#else
-  ogg_uint32_t XSum;
-  ogg_uint32_t XXSum;
-
-  __asm {
-    align 16
-
-        mov ebx, SrcData
-        mov ecx, RefDataPtr1
-        mov edx, RefDataPtr2
-
-    pcmpeqd   mm4, mm4    ; /* fefefefefefefefe in mm4 */
-    paddb   mm4, mm4    ;
-    pxor    mm5, mm5    ;
-    pxor    mm6, mm6    ;
-    pxor    mm7, mm7    ;
-    mov   edi, 8    ;
-    loop_start:       ;
-    movq    mm0, [ebx]    ; /* take 8 bytes */
-
-    movq    mm2, [ecx]    ;
-    movq    mm3, [edx]    ; /* take average of mm2 and mm3 */
-    movq    mm1, mm2    ;
-    pand    mm1, mm3    ;
-    pxor    mm3, mm2    ;
-    pand    mm3, mm4    ;
-    psrlq   mm3, 1    ;
-    paddb   mm1, mm3    ;
-
-    movq    mm2, mm0    ;
-    movq    mm3, mm1    ;
-
-    punpcklbw   mm0, mm6    ;
-    punpcklbw   mm1, mm6    ;
-    punpckhbw   mm2, mm6    ;
-    punpckhbw   mm3, mm6    ;
-
-    psubsw    mm0, mm1    ;
-    psubsw    mm2, mm3    ;
-
-    paddw   mm5, mm0    ;
-    paddw   mm5, mm2    ;
-
-    pmaddwd   mm0, mm0    ;
-    pmaddwd   mm2, mm2    ;
-            ;
-    paddd   mm7, mm0    ;
-    paddd   mm7, mm2    ;
-
-    add   ebx, SrcStride    ; /* Inc pointer into src data */
-    add   ecx, RefStride    ; /* Inc pointer into ref data */
-    add   edx, RefStride    ; /* Inc pointer into ref data */
-
-    dec   edi   ;
-    jnz   loop_start    ;
-
-    movq    mm0, mm5    ;
-    psrlq   mm5, 32   ;
-    paddw   mm5, mm0    ;
-    movq    mm0, mm5    ;
-    psrlq   mm5, 16   ;
-    paddw   mm5, mm0    ;
-    movd    edi, mm5    ;
-    movsx   edi, di   ;
-    mov         XSum, edi   ; /* movl   eax, edi    ; Modified for vc to resuse eax*/
-
-    movq    mm0, mm7    ;
-    psrlq   mm7, 32   ;
-    paddd   mm7, mm0    ;
-    movd        XXSum, mm7 ; /*movd   eax, mm7    ; Modified for vc to reuse eax */
-  };
-
-    return (( (XXSum<<6) - XSum*XSum ));
-
-#endif
-}
-
-static void restore_fpu (void)
-{
-
-    __asm {
-        emms
-    }
-
-}
-
-void dsp_mmx_init(DspFunctions *funcs)
-{
-  funcs->restore_fpu = restore_fpu;
-  funcs->sub8x8 = sub8x8__mmx;
-  funcs->sub8x8_128 = sub8x8_128__mmx;
-  funcs->sub8x8avg2 = sub8x8avg2__mmx;
-  funcs->row_sad8 = row_sad8__mmx;
-  funcs->col_sad8x8 = col_sad8x8__mmx;
-  funcs->sad8x8 = sad8x8__mmx;
-  funcs->sad8x8_thres = sad8x8_thres__mmx;
-  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
-  funcs->intra8x8_err = intra8x8_err__mmx;
-  funcs->inter8x8_err = inter8x8_err__mmx;
-  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
-}
-

+ 0 - 333
Engine/lib/libtheora/lib/enc/x86_32_vs/fdct_mmx.c

@@ -1,333 +0,0 @@
-;//==========================================================================
-;//
-;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
-;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
-;//  PURPOSE.
-;//
-;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
-;//
-;//--------------------------------------------------------------------------
-
-#include "theora/theora.h"
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-
-static const  ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15;
-static const  ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83;
-static const  ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4db;
-static const  ogg_int64_t xC4S4 = 0x0b505b505b505b505;
-static const  ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3a;
-static const  ogg_int64_t xC6S2 = 0x061f861f861f861f8;
-static const  ogg_int64_t xC7S1 = 0x031f131f131f131f1;
-
-
-static __inline void Transpose_mmx( ogg_int16_t *InputData1, ogg_int16_t *OutputData1,
-                                 ogg_int16_t *InputData2, ogg_int16_t *OutputData2)
-{
-
-    __asm {
-        align 16
-            mov     eax, InputData1
-            mov     ebx, InputData2
-            mov     ecx, OutputData1
-            mov     edx, OutputData2
-
-
-        movq    mm0, [eax]    ; /* mm0 = a0 a1 a2 a3 */
-        movq    mm4, [ebx]    ; /* mm4 = e4 e5 e6 e7 */
-        movq    mm1, [16 + eax]   ; /* mm1 = b0 b1 b2 b3 */
-        movq    mm5, [16 + ebx]   ; /* mm5 = f4 f5 f6 f7 */
-        movq    mm2, [32 + eax]   ; /* mm2 = c0 c1 c2 c3 */
-        movq    mm6, [32 + ebx]   ; /* mm6 = g4 g5 g6 g7 */
-        movq    mm3, [48 + eax]   ; /* mm3 = d0 d1 d2 d3 */
-        movq    [16 + ecx], mm1   ; /* save  b0 b1 b2 b3 */
-        movq    mm7, [48 + ebx]   ; /* mm7 = h0 h1 h2 h3 */
-          ; /* Transpose 2x8 block */
-        movq    mm1, mm4    ; /* mm1 = e3 e2 e1 e0 */
-        punpcklwd   mm4, mm5    ; /* mm4 = f1 e1 f0 e0 */
-        movq    [ecx], mm0    ; /* save a3 a2 a1 a0  */
-        punpckhwd   mm1, mm5    ; /* mm1 = f3 e3 f2 e2 */
-        movq    mm0, mm6    ; /* mm0 = g3 g2 g1 g0 */
-        punpcklwd   mm6, mm7    ; /* mm6 = h1 g1 h0 g0 */
-        movq    mm5, mm4    ; /* mm5 = f1 e1 f0 e0 */
-        punpckldq   mm4, mm6    ; /* mm4 = h0 g0 f0 e0 = MM4 */
-        punpckhdq   mm5, mm6    ; /* mm5 = h1 g1 f1 e1 = MM5 */
-        movq    mm6, mm1    ; /* mm6 = f3 e3 f2 e2 */
-        movq    [edx], mm4    ;
-        punpckhwd   mm0, mm7    ; /* mm0 = h3 g3 h2 g2 */
-        movq    [16 + edx], mm5   ;
-        punpckhdq   mm6, mm0    ; /* mm6 = h3 g3 f3 e3 = MM7 */
-        movq    mm4, [ecx]    ; /* mm4 = a3 a2 a1 a0 */
-        punpckldq   mm1, mm0    ; /* mm1 = h2 g2 f2 e2 = MM6 */
-        movq    mm5, [16 + ecx]   ; /* mm5 = b3 b2 b1 b0 */
-        movq    mm0, mm4    ; /* mm0 = a3 a2 a1 a0 */
-        movq    [48 + edx], mm6   ;
-        punpcklwd   mm0, mm5    ; /* mm0 = b1 a1 b0 a0 */
-        movq    [32 + edx], mm1   ;
-        punpckhwd   mm4, mm5    ; /* mm4 = b3 a3 b2 a2 */
-        movq    mm5, mm2    ; /* mm5 = c3 c2 c1 c0 */
-        punpcklwd   mm2, mm3    ; /* mm2 = d1 c1 d0 c0 */
-        movq    mm1, mm0    ; /* mm1 = b1 a1 b0 a0 */
-        punpckldq   mm0, mm2    ; /* mm0 = d0 c0 b0 a0 = MM0 */
-        punpckhdq   mm1, mm2    ; /* mm1 = d1 c1 b1 a1 = MM1 */
-        movq    mm2, mm4    ; /* mm2 = b3 a3 b2 a2 */
-        movq    [ecx], mm0    ;
-        punpckhwd   mm5, mm3    ; /* mm5 = d3 c3 d2 c2 */
-        movq    [16 + ecx], mm1   ;
-        punpckhdq   mm4, mm5    ; /* mm4 = d3 c3 b3 a3 = MM3 */
-        punpckldq   mm2, mm5    ; /* mm2 = d2 c2 b2 a2 = MM2 */
-        movq    [48 + ecx], mm4   ;
-        movq    [32 + ecx], mm2   ;
-
-    };
-
-
-}
-
-static __inline void Fdct_mmx( ogg_int16_t *InputData1, ogg_int16_t *InputData2, ogg_int16_t *temp)
-{
-
-    __asm {
-        align 16
-
-
-                mov     eax, InputData1
-                mov     ebx, InputData2
-                mov     ecx, temp
-        movq    mm0, [eax]    ;
-        movq    mm1, [16 + eax]   ;
-        movq    mm2, [48 + eax]   ;
-        movq    mm3, [16 + ebx]   ;
-        movq    mm4, mm0    ;
-        movq    mm5, mm1    ;
-        movq    mm6, mm2    ;
-        movq    mm7, mm3    ;
-                ;
-        paddsw    mm0, [48 + ebx]   ; /* mm0 = ip0 + ip7 = is07 */
-        paddsw    mm1, [32 + eax]   ; /* mm1 = ip1 + ip2 = is12 */
-        paddsw    mm2, [ebx]    ; /* mm2 = ip3 + ip4 = is34 */
-        paddsw    mm3, [32 + ebx]   ; /* mm3 = ip5 + ip6 = is56 */
-        psubsw    mm4, [48 + ebx]   ; /* mm4 = ip0 - ip7 = id07 */
-        psubsw    mm5, [32 + eax]   ; /* mm5 = ip1 - ip2 = id12 */
-                ;
-        psubsw    mm0, mm2    ; /* mm0 = is07 - is34 */
-                ;
-        paddsw    mm2, mm2    ;
-                ;
-        psubsw    mm6, [ebx]    ; /* mm6 = ip3 - ip4 = id34 */
-                ;
-        paddsw    mm2, mm0    ; /* mm2 = is07 + is34 = is0734 */
-        psubsw    mm1, mm3    ; /* mm1 = is12 - is56 */
-        movq    [ecx], mm0    ; /* Save is07 - is34 to free mm0; */
-        paddsw    mm3, mm3    ;
-        paddsw    mm3, mm1    ; /* mm3 = is12 + 1s56  = is1256 */
-                ;
-        psubsw    mm7, [32 + ebx]   ; /* mm7 = ip5 - ip6 = id56 */
-          ; /* ------------------------------------------------------------------- */
-        psubsw    mm5, mm7    ; /* mm5 = id12 - id56 */
-        paddsw    mm7, mm7    ;
-        paddsw    mm7, mm5    ; /* mm7 = id12 + id56 */
-          ; /* ------------------------------------------------------------------- */
-        psubsw    mm2, mm3    ; /* mm2 = is0734 - is1256 */
-        paddsw    mm3, mm3    ;
-                ;
-        movq    mm0, mm2    ; /* make a copy */
-        paddsw    mm3, mm2    ; /* mm3 = is0734 + is1256 */
-                ;
-        pmulhw    mm0, xC4S4    ; /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
-        paddw   mm0, mm2    ; /* mm0 = xC4S4 * ( is0734 - is1256 ) */
-        psrlw   mm2, 15   ;
-        paddw   mm0, mm2    ; /* Truncate mm0, now it is op[4] */
-                ;
-        movq    mm2, mm3    ;
-        movq    [ebx], mm0    ; /* save ip4, now mm0,mm2 are free */
-                ;
-        movq    mm0, mm3    ;
-        pmulhw    mm3, xC4S4    ; /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
-                ;
-        psrlw   mm2, 15   ;
-        paddw   mm3, mm0    ; /* mm3 = xC4S4 * ( is0734 +is1256 )  */
-        paddw   mm3, mm2    ; /* Truncate mm3, now it is op[0] */
-                ;
-        movq    [eax], mm3    ;
-          ; /* ------------------------------------------------------------------- */
-        movq    mm3, [ecx]    ; /* mm3 = irot_input_y */
-        pmulhw    mm3, xC2S6  ; /* mm3 = xC2S6 * irot_input_y - irot_input_y */
-                ;
-        movq    mm2, [ecx]    ;
-        movq    mm0, mm2    ;
-                ;
-        psrlw   mm2, 15   ; /* mm3 = xC2S6 * irot_input_y */
-        paddw   mm3, mm0    ;
-                ;
-        paddw   mm3, mm2    ; /* Truncated */
-        movq    mm0, mm5    ;
-                ;
-        movq    mm2, mm5    ;
-        pmulhw    mm0, xC6S2    ; /* mm0 = xC6S2 * irot_input_x */
-                ;
-        psrlw   mm2, 15   ;
-        paddw   mm0, mm2    ; /* Truncated */
-                ;
-        paddsw    mm3, mm0    ; /* ip[2] */
-        movq    [32 + eax], mm3   ; /* Save ip2 */
-                ;
-        movq    mm0, mm5    ;
-        movq    mm2, mm5    ;
-                ;
-        pmulhw    mm5, xC2S6    ; /* mm5 = xC2S6 * irot_input_x - irot_input_x */
-        psrlw   mm2, 15   ;
-                ;
-        movq    mm3, [ecx]    ;
-        paddw   mm5, mm0    ; /* mm5 = xC2S6 * irot_input_x */
-                ;
-        paddw   mm5, mm2    ; /* Truncated */
-        movq    mm2, mm3    ;
-                ;
-        pmulhw    mm3, xC6S2    ; /* mm3 = xC6S2 * irot_input_y */
-        psrlw   mm2, 15   ;
-                ;
-        paddw   mm3, mm2    ; /* Truncated */
-        psubsw    mm3, mm5    ;
-                ;
-        movq    [32 + ebx], mm3   ;
-          ; /* ------------------------------------------------------------------- */
-        movq    mm0, xC4S4    ;
-        movq    mm2, mm1    ;
-        movq    mm3, mm1    ;
-                ;
-        pmulhw    mm1, mm0    ; /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
-        psrlw   mm2, 15   ;
-                ;
-        paddw   mm1, mm3    ; /* mm0 = xC4S4 * ( is12 - is56 ) */
-        paddw   mm1, mm2    ; /* Truncate mm1, now it is icommon_product1 */
-                ;
-        movq    mm2, mm7    ;
-        movq    mm3, mm7    ;
-                ;
-        pmulhw    mm7, mm0    ; /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
-        psrlw   mm2, 15   ;
-                ;
-        paddw   mm7, mm3    ; /* mm7 = xC4S4 * ( id12 + id56 ) */
-        paddw   mm7, mm2    ; /* Truncate mm7, now it is icommon_product2 */
-          ; /* ------------------------------------------------------------------- */
-        pxor    mm0, mm0    ; /* Clear mm0 */
-        psubsw    mm0, mm6    ; /* mm0 = - id34 */
-                ;
-        psubsw    mm0, mm7    ; /* mm0 = - ( id34 + idcommon_product2 ) */
-        paddsw    mm6, mm6    ;
-        paddsw    mm6, mm0    ; /* mm6 = id34 - icommon_product2 */
-                ;
-        psubsw    mm4, mm1    ; /* mm4 = id07 - icommon_product1 */
-        paddsw    mm1, mm1    ;
-        paddsw    mm1, mm4    ; /* mm1 = id07 + icommon_product1 */
-          ; /* ------------------------------------------------------------------- */
-        movq    mm7, xC1S7    ;
-        movq    mm2, mm1    ;
-                ;
-        movq    mm3, mm1    ;
-        pmulhw    mm1, mm7    ; /* mm1 = xC1S7 * irot_input_x - irot_input_x */
-                ;
-        movq    mm7, xC7S1    ;
-        psrlw   mm2, 15   ;
-                ;
-        paddw   mm1, mm3    ; /* mm1 = xC1S7 * irot_input_x */
-        paddw   mm1, mm2    ; /* Trucated */
-                ;
-        pmulhw    mm3, mm7    ; /* mm3 = xC7S1 * irot_input_x */
-        paddw   mm3, mm2    ; /* Truncated */
-                ;
-        movq    mm5, mm0    ;
-        movq    mm2, mm0    ;
-                ;
-        movq    mm7, xC1S7    ;
-        pmulhw    mm0, mm7    ; /* mm0 = xC1S7 * irot_input_y - irot_input_y */
-                ;
-        movq    mm7, xC7S1    ;
-        psrlw   mm2, 15   ;
-                ;
-        paddw   mm0, mm5    ; /* mm0 = xC1S7 * irot_input_y */
-        paddw   mm0, mm2    ; /* Truncated */
-                ;
-        pmulhw    mm5, mm7    ; /* mm5 = xC7S1 * irot_input_y */
-        paddw   mm5, mm2    ; /* Truncated */
-                ;
-        psubsw    mm1, mm5    ; /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */
-        paddsw    mm3, mm0    ; /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */
-                ;
-        movq    [16 + eax], mm1   ;
-        movq    [48 + ebx], mm3   ;
-          ; /* ------------------------------------------------------------------- */
-        movq    mm0, xC3S5    ;
-        movq    mm1, xC5S3    ;
-                ;
-        movq    mm5, mm6    ;
-        movq    mm7, mm6    ;
-                ;
-        movq    mm2, mm4    ;
-        movq    mm3, mm4    ;
-                ;
-        pmulhw    mm4, mm0    ; /* mm4 = xC3S5 * irot_input_x - irot_input_x */
-        pmulhw    mm6, mm1    ; /* mm6 = xC5S3 * irot_input_y - irot_input_y */
-                ;
-        psrlw   mm2, 15   ;
-        psrlw   mm5, 15   ;
-                ;
-        paddw   mm4, mm3    ; /* mm4 = xC3S5 * irot_input_x */
-        paddw   mm6, mm7    ; /* mm6 = xC5S3 * irot_input_y */
-                ;
-        paddw   mm4, mm2    ; /* Truncated */
-        paddw   mm6, mm5    ; /* Truncated */
-                ;
-        psubsw    mm4, mm6    ; /* ip3 */
-        movq    [48 + eax], mm4   ;
-                ;
-        movq    mm4, mm3    ;
-        movq    mm6, mm7    ;
-                ;
-        pmulhw    mm3, mm1    ; /* mm3 = xC5S3 * irot_input_x - irot_input_x */
-        pmulhw    mm7, mm0    ; /* mm7 = xC3S5 * irot_input_y - irot_input_y */
-                ;
-        paddw   mm4, mm2    ;
-        paddw   mm6, mm5    ;
-                ;
-        paddw   mm3, mm4    ; /* mm3 = xC5S3 * irot_input_x */
-        paddw   mm7, mm6    ; /* mm7 = xC3S5 * irot_input_y */
-                ;
-        paddw   mm3, mm7    ; /* ip5 */
-        movq    [16 + ebx], mm3   ;
-
-};
-
-}
-
-
-static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
-{
-
-  static ogg_int16_t tmp[32];
-  ogg_int16_t* align_tmp = (ogg_int16_t*)((unsigned char*)tmp + (16 - ((int)tmp)&15));
-
-
-  Transpose_mmx(InputData, OutputData, InputData + 4, OutputData + 4);
-  Fdct_mmx(OutputData, OutputData + 4, align_tmp);
-
-  Transpose_mmx(InputData + 32, OutputData + 32, InputData + 36, OutputData + 36);
-  Fdct_mmx(OutputData+32, OutputData + 36, align_tmp);
-
-  Transpose_mmx(OutputData, OutputData, OutputData + 32, OutputData + 32);
-  Fdct_mmx(OutputData, OutputData + 32, align_tmp);
-
-  Transpose_mmx(OutputData + 4, OutputData + 4, OutputData + 36, OutputData + 36);
-  Fdct_mmx(OutputData + 4, OutputData + 36, align_tmp);
-
-  __asm     emms
-
-}
-
-void dsp_mmx_fdct_init(DspFunctions *funcs)
-{
-  funcs->fdct_short = fdct_short__mmx;
-}

+ 0 - 197
Engine/lib/libtheora/lib/enc/x86_32_vs/recon_mmx.c

@@ -1,197 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: reconstruct.c,v 1.6 2003/12/03 08:59:41 arc Exp $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-
-static const unsigned __int64 V128 = 0x8080808080808080;
-
-static void copy8x8__mmx (unsigned char *src,
-                    unsigned char *dest,
-                    unsigned int stride)
-{
-
-    //Is this even the fastest way to do this?
-    __asm {
-        align 16
-
-        mov         eax, src
-        mov         ebx, dest
-        mov         ecx, stride
-
-        lea         edi, [ecx + ecx * 2]
-        movq        mm0, [eax]
-        movq        mm1, [eax + ecx]
-        movq        mm2, [eax + ecx * 2]
-        movq        mm3, [eax + edi]
-        lea         eax, [eax + ecx * 4]
-        movq        [ebx], mm0
-        movq        [ebx + ecx], mm1
-        movq        [ebx + ecx * 2], mm2
-        movq        [ebx + edi], mm3
-        lea         ebx, [ebx + ecx * 4]
-        movq        mm0, [eax]
-        movq        mm1, [eax + ecx]
-        movq        mm2, [eax + ecx * 2]
-        movq        mm3, [eax + edi]
-        movq        [ebx], mm0
-        movq        [ebx + ecx], mm1
-        movq        [ebx + ecx * 2], mm2
-        movq        [ebx + edi], mm3
-
-    };
-
-}
-
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-              ogg_uint32_t LineStep)
-{
-
-    __asm {
-        align 16
-
-        mov         eax, ReconPtr
-        mov         ebx, ChangePtr
-        mov         ecx, LineStep
-
-        movq        mm0, V128
-
-        lea         edi, [128 + ebx]
-    loop_start:
-        movq        mm2, [ebx]
-
-        packsswb    mm2, [8 + ebx]
-        por         mm0, mm0
-        pxor        mm2, mm0
-        lea         ebx, [16 + ebx]
-        cmp         ebx, edi
-
-        movq        [eax], mm2
-
-
-
-        lea         eax, [eax + ecx]
-        jc          loop_start
-
-
-    };
-
-}
-
-
-
-
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
-              ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-
-    __asm {
-
-        align 16
-
-        mov         eax, ReconPtr
-        mov         ebx, ChangePtr
-        mov         ecx, LineStep
-        mov         edx, RefPtr
-
-        pxor        mm0, mm0
-        lea         edi, [128 + ebx]
-
-    loop_start:
-        movq        mm2, [edx]
-
-        movq        mm4, [ebx]
-        movq        mm3, mm2
-        movq        mm5, [8 + ebx]
-        punpcklbw   mm2, mm0
-        paddsw      mm2, mm4
-        punpckhbw   mm3, mm0
-        paddsw      mm3, mm5
-        add         edx, ecx
-        packuswb    mm2, mm3
-        lea         ebx, [16 + ebx]
-        cmp         ebx, edi
-
-        movq        [eax], mm2
-
-        lea         eax, [eax + ecx]
-        jc          loop_start
-
-    };
-}
-
-
-
-
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
-                   unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-               ogg_uint32_t LineStep)
-{
-    __asm {
-        align 16
-
-        mov     eax, ReconPtr
-        mov     ebx, ChangePtr
-        mov     ecx, RefPtr1
-        mov     edx, RefPtr2
-
-        pxor        mm0, mm0
-        lea     edi, [128 + ebx]
-
-    loop_start:
-        movq        mm2, [ecx]
-        movq        mm4, [edx]
-
-        movq        mm3, mm2
-        punpcklbw       mm2, mm0
-        movq        mm5, mm4
-        movq        mm6, [ebx]
-        punpckhbw       mm3, mm0
-        movq        mm7, [8 + ebx]
-        punpcklbw       mm4, mm0
-        punpckhbw       mm5, mm0
-        paddw       mm2, mm4
-        paddw       mm3, mm5
-        psrlw       mm2, 1
-        psrlw       mm3, 1
-        paddw       mm2, mm6
-        paddw       mm3, mm7
-        lea     ebx, [16 + ebx]
-        packuswb        mm2, mm3
-        add     ecx, LineStep
-        add     edx, LineStep
-        movq        [eax], mm2
-        add     eax, LineStep
-        cmp     ebx, edi
-        jc      loop_start
-
-    };
-
-}
-
-
-
-
-void dsp_mmx_recon_init(DspFunctions *funcs)
-{
-  funcs->copy8x8 = copy8x8__mmx;
-  funcs->recon_intra8x8 = recon_intra8x8__mmx;
-  funcs->recon_inter8x8 = recon_inter8x8__mmx;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
-}
-

+ 0 - 409
Engine/lib/libtheora/lib/enc/x86_64/dct_decode_mmx.c

@@ -1,409 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dct_decode_mmx.c 15400 2008-10-15 12:10:58Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V3=
- 0x0003000300030003LL;
-static const __attribute__((aligned(8),used)) ogg_int64_t OC_V4=
- 0x0004000400040004LL;
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
-                          const ogg_int16_t *_ll){
-  long esi;
-  _pix-=_ystride*2;
-  __asm__ __volatile__(
-    /*mm0=0*/
-    "pxor %%mm0,%%mm0\n\t"
-    /*esi=_ystride*3*/
-    "lea (%[ystride],%[ystride],2),%[s]\n\t"
-    /*mm7=_pix[0...8]*/
-    "movq (%[pix]),%%mm7\n\t"
-    /*mm4=_pix[0...8+_ystride*3]*/
-    "movq (%[pix],%[s]),%%mm4\n\t"
-    /*mm6=_pix[0...8]*/
-    "movq %%mm7,%%mm6\n\t"
-    /*Expand unsigned _pix[0...3] to 16 bits.*/
-    "punpcklbw %%mm0,%%mm6\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    /*Expand unsigned _pix[4...8] to 16 bits.*/
-    "punpckhbw %%mm0,%%mm7\n\t"
-    /*Expand other arrays too.*/
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm5\n\t"
-    /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
-    "psubw %%mm4,%%mm6\n\t"
-    "psubw %%mm5,%%mm7\n\t"
-    /*mm5=mm4=_pix[0...8+_ystride]*/
-    "movq (%[pix],%[ystride]),%%mm4\n\t"
-    /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
-    "movq (%[pix],%[ystride],2),%%mm2\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    "movq %%mm2,%%mm3\n\t"
-    "movq %%mm2,%%mm1\n\t"
-    /*Expand these arrays.*/
-    "punpckhbw %%mm0,%%mm5\n\t"
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm3\n\t"
-    "punpcklbw %%mm0,%%mm2\n\t"
-    /*Preload...*/
-    "movq %[OC_V3],%%mm0\n\t"
-    /*mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
-    "psubw %%mm5,%%mm3\n\t"
-    "psubw %%mm4,%%mm2\n\t"
-    /*Scale by 3.*/
-    "pmullw %%mm0,%%mm3\n\t"
-    "pmullw %%mm0,%%mm2\n\t"
-    /*Preload...*/
-    "movq %[OC_V4],%%mm0\n\t"
-    /*f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
-       3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
-    "paddw %%mm7,%%mm3\n\t"
-    "paddw %%mm6,%%mm2\n\t"
-    /*Add 4.*/
-    "paddw %%mm0,%%mm3\n\t"
-    "paddw %%mm0,%%mm2\n\t"
-    /*"Divide" by 8.*/
-    "psraw $3,%%mm3\n\t"
-    "psraw $3,%%mm2\n\t"
-    /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
-    /*Free up mm5.*/
-    "packuswb %%mm5,%%mm4\n\t"
-    /*mm0=L L L L*/
-    "movq (%[ll]),%%mm0\n\t"
-    /*if(R_i<-2L||R_i>2L)R_i=0:*/
-    "movq %%mm2,%%mm5\n\t"
-    "pxor %%mm6,%%mm6\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "psubw %%mm0,%%mm6\n\t"
-    "psllw $1,%%mm7\n\t"
-    "psllw $1,%%mm6\n\t"
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    "pcmpgtw %%mm2,%%mm7\n\t"
-    "pcmpgtw %%mm6,%%mm5\n\t"
-    "pand %%mm7,%%mm2\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "pand %%mm5,%%mm2\n\t"
-    "psllw $1,%%mm7\n\t"
-    "movq %%mm3,%%mm5\n\t"
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-2L -2L -2L -2L*/
-    /*mm7==2L 2L 2L 2L*/
-    "pcmpgtw %%mm3,%%mm7\n\t"
-    "pcmpgtw %%mm6,%%mm5\n\t"
-    "pand %%mm7,%%mm3\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    "pand %%mm5,%%mm3\n\t"
-    /*if(R_i<-L)R_i'=R_i+2L;
-      if(R_i>L)R_i'=R_i-2L;
-      if(R_i<-L||R_i>L)R_i=-R_i':*/
-    "psraw $1,%%mm6\n\t"
-    "movq %%mm2,%%mm5\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm2==R_3 R_2 R_1 R_0*/
-    /*mm5==R_3 R_2 R_1 R_0*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm5=R_i>L?FF:00*/
-    "pcmpgtw %%mm0,%%mm5\n\t"
-    /*mm6=-L>R_i?FF:00*/
-    "pcmpgtw %%mm2,%%mm6\n\t"
-    /*mm7=R_i>L?2L:0*/
-    "pand %%mm5,%%mm7\n\t"
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm7,%%mm2\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    /*mm5=-L>R_i||R_i>L*/
-    "por %%mm6,%%mm5\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm7=-L>R_i?2L:0*/
-    "pand %%mm6,%%mm7\n\t"
-    "pxor %%mm6,%%mm6\n\t"
-    /*mm2=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm7,%%mm2\n\t"
-    "psubw %%mm0,%%mm6\n\t"
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    "pand %%mm2,%%mm5\n\t"
-    "movq %%mm0,%%mm7\n\t"
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm5,%%mm2\n\t"
-    "psllw $1,%%mm7\n\t"
-    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm5,%%mm2\n\t"
-    "movq %%mm3,%%mm5\n\t"
-    /*mm3==R_7 R_6 R_5 R_4*/
-    /*mm5==R_7 R_6 R_5 R_4*/
-    /*mm6==-L -L -L -L*/
-    /*mm0==L L L L*/
-    /*mm6=-L>R_i?FF:00*/
-    "pcmpgtw %%mm3,%%mm6\n\t"
-    /*mm5=R_i>L?FF:00*/
-    "pcmpgtw %%mm0,%%mm5\n\t"
-    /*mm7=R_i>L?2L:0*/
-    "pand %%mm5,%%mm7\n\t"
-    /*mm2=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm7,%%mm3\n\t"
-    "psllw $1,%%mm0\n\t"
-    /*mm5=-L>R_i||R_i>L*/
-    "por %%mm6,%%mm5\n\t"
-    /*mm0=-L>R_i?2L:0*/
-    "pand %%mm6,%%mm0\n\t"
-    /*mm3=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm0,%%mm3\n\t"
-    /*mm5=-L>R_i||R_i>L?-R_i':0*/
-    "pand %%mm3,%%mm5\n\t"
-    /*mm2=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm5,%%mm3\n\t"
-    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm5,%%mm3\n\t"
-    /*Unfortunately, there's no unsigned byte+signed byte with unsigned
-       saturation op code, so we have to promote things back 16 bits.*/
-    "pxor %%mm0,%%mm0\n\t"
-    "movq %%mm4,%%mm5\n\t"
-    "punpcklbw %%mm0,%%mm4\n\t"
-    "punpckhbw %%mm0,%%mm5\n\t"
-    "movq %%mm1,%%mm6\n\t"
-    "punpcklbw %%mm0,%%mm1\n\t"
-    "punpckhbw %%mm0,%%mm6\n\t"
-    /*_pix[0...8+_ystride]+=R_i*/
-    "paddw %%mm2,%%mm4\n\t"
-    "paddw %%mm3,%%mm5\n\t"
-    /*_pix[0...8+_ystride*2]-=R_i*/
-    "psubw %%mm2,%%mm1\n\t"
-    "psubw %%mm3,%%mm6\n\t"
-    "packuswb %%mm5,%%mm4\n\t"
-    "packuswb %%mm6,%%mm1\n\t"
-    /*Write it back out.*/
-    "movq %%mm4,(%[pix],%[ystride])\n\t"
-    "movq %%mm1,(%[pix],%[ystride],2)\n\t"
-    :[s]"=&S"(esi)
-    :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll),
-     [OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
-    :"memory"
-  );
-}
-
-/*This code implements the bulk of loop_filter_h().
-  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
-   four p0's to one register we must transpose the values in four mmx regs.
-  When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
-                           const ogg_int16_t *_ll){
-  long esi;
-  long edi;
-  __asm__ __volatile__(
-    /*x x x x 3 2 1 0*/
-    "movd (%[pix]),%%mm0\n\t"
-    /*esi=_ystride*3*/
-    "lea (%[ystride],%[ystride],2),%[s]\n\t"
-    /*x x x x 7 6 5 4*/
-    "movd (%[pix],%[ystride]),%%mm1\n\t"
-    /*x x x x B A 9 8*/
-    "movd (%[pix],%[ystride],2),%%mm2\n\t"
-    /*x x x x F E D C*/
-    "movd (%[pix],%[s]),%%mm3\n\t"
-    /*mm0=7 3 6 2 5 1 4 0*/
-    "punpcklbw %%mm1,%%mm0\n\t"
-    /*mm2=F B E A D 9 C 8*/
-    "punpcklbw %%mm3,%%mm2\n\t"
-    /*mm1=7 3 6 2 5 1 4 0*/
-    "movq %%mm0,%%mm1\n\t"
-    /*mm0=F B 7 3 E A 6 2*/
-    "punpckhwd %%mm2,%%mm0\n\t"
-    /*mm1=D 9 5 1 C 8 4 0*/
-    "punpcklwd %%mm2,%%mm1\n\t"
-    "pxor %%mm7,%%mm7\n\t"
-    /*mm5=D 9 5 1 C 8 4 0*/
-    "movq %%mm1,%%mm5\n\t"
-    /*mm1=x C x 8 x 4 x 0==pix[0]*/
-    "punpcklbw %%mm7,%%mm1\n\t"
-    /*mm5=x D x 9 x 5 x 1==pix[1]*/
-    "punpckhbw %%mm7,%%mm5\n\t"
-    /*mm3=F B 7 3 E A 6 2*/
-    "movq %%mm0,%%mm3\n\t"
-    /*mm0=x E x A x 6 x 2==pix[2]*/
-    "punpcklbw %%mm7,%%mm0\n\t"
-    /*mm3=x F x B x 7 x 3==pix[3]*/
-    "punpckhbw %%mm7,%%mm3\n\t"
-    /*mm1=mm1-mm3==pix[0]-pix[3]*/
-    "psubw %%mm3,%%mm1\n\t"
-    /*Save a copy of pix[2] for later.*/
-    "movq %%mm0,%%mm4\n\t"
-    /*mm0=mm0-mm5==pix[2]-pix[1]*/
-    "psubw %%mm5,%%mm0\n\t"
-    /*Scale by 3.*/
-    "pmullw %[OC_V3],%%mm0\n\t"
-    /*f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
-    "paddw %%mm1,%%mm0\n\t"
-    /*Add 4.*/
-    "paddw %[OC_V4],%%mm0\n\t"
-    /*"Divide" by 8, producing the residuals R_i.*/
-    "psraw $3,%%mm0\n\t"
-    /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
-    /*mm6=L L L L*/
-    "movq (%[ll]),%%mm6\n\t"
-    /*if(R_i<-2L||R_i>2L)R_i=0:*/
-    "movq %%mm0,%%mm1\n\t"
-    "pxor %%mm2,%%mm2\n\t"
-    "movq %%mm6,%%mm3\n\t"
-    "psubw %%mm6,%%mm2\n\t"
-    "psllw $1,%%mm3\n\t"
-    "psllw $1,%%mm2\n\t"
-    /*mm0==R_3 R_2 R_1 R_0*/
-    /*mm1==R_3 R_2 R_1 R_0*/
-    /*mm2==-2L -2L -2L -2L*/
-    /*mm3==2L 2L 2L 2L*/
-    "pcmpgtw %%mm0,%%mm3\n\t"
-    "pcmpgtw %%mm2,%%mm1\n\t"
-    "pand %%mm3,%%mm0\n\t"
-    "pand %%mm1,%%mm0\n\t"
-    /*if(R_i<-L)R_i'=R_i+2L;
-      if(R_i>L)R_i'=R_i-2L;
-      if(R_i<-L||R_i>L)R_i=-R_i':*/
-    "psraw $1,%%mm2\n\t"
-    "movq %%mm0,%%mm1\n\t"
-    "movq %%mm6,%%mm3\n\t"
-    /*mm0==R_3 R_2 R_1 R_0*/
-    /*mm1==R_3 R_2 R_1 R_0*/
-    /*mm2==-L -L -L -L*/
-    /*mm6==L L L L*/
-    /*mm2=-L>R_i?FF:00*/
-    "pcmpgtw %%mm0,%%mm2\n\t"
-    /*mm1=R_i>L?FF:00*/
-    "pcmpgtw %%mm6,%%mm1\n\t"
-    /*mm3=2L 2L 2L 2L*/
-    "psllw $1,%%mm3\n\t"
-    /*mm6=2L 2L 2L 2L*/
-    "psllw $1,%%mm6\n\t"
-    /*mm3=R_i>L?2L:0*/
-    "pand %%mm1,%%mm3\n\t"
-    /*mm6=-L>R_i?2L:0*/
-    "pand %%mm2,%%mm6\n\t"
-    /*mm0=R_i>L?R_i-2L:R_i*/
-    "psubw %%mm3,%%mm0\n\t"
-    /*mm1=-L>R_i||R_i>L*/
-    "por %%mm2,%%mm1\n\t"
-    /*mm0=-L>R_i?R_i+2L:R_i*/
-    "paddw %%mm6,%%mm0\n\t"
-    /*mm1=-L>R_i||R_i>L?R_i':0*/
-    "pand %%mm0,%%mm1\n\t"
-    /*mm0=-L>R_i||R_i>L?0:R_i*/
-    "psubw %%mm1,%%mm0\n\t"
-    /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
-    "psubw %%mm1,%%mm0\n\t"
-    /*_pix[1]+=R_i;*/
-    "paddw %%mm0,%%mm5\n\t"
-    /*_pix[2]-=R_i;*/
-    "psubw %%mm0,%%mm4\n\t"
-    /*mm5=x x x x D 9 5 1*/
-    "packuswb %%mm7,%%mm5\n\t"
-    /*mm4=x x x x E A 6 2*/
-    "packuswb %%mm7,%%mm4\n\t"
-    /*mm5=E D A 9 6 5 2 1*/
-    "punpcklbw %%mm4,%%mm5\n\t"
-    /*edi=6 5 2 1*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix])\n\t"
-    /*Why is there such a big stall here?*/
-    "psrlq $32,%%mm5\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride])\n\t"
-    /*edi=E D A 9*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride],2)\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[s])\n\t"
-    :[s]"=&S"(esi),[d]"=&D"(edi),
-     [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
-    :[OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
-    :"memory"
-  );
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
-                          const ogg_int16_t *_ll){
-  _pix-=2;
-  loop_filter_h4(_pix,_ystride,_ll);
-  loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){
-  int j;
-  ogg_int16_t __attribute__((aligned(8)))  ll[4];
-  unsigned char *cp = pbi->display_fragments;
-  ogg_uint32_t *bp = pbi->recon_pixel_index_table;
-
-  if ( FLimit == 0 ) return;
-  ll[0]=ll[1]=ll[2]=ll[3]=FLimit;
-
-  for ( j = 0; j < 3 ; j++){
-    ogg_uint32_t *bp_begin = bp;
-    ogg_uint32_t *bp_end;
-    int stride;
-    int h;
-
-    switch(j) {
-    case 0: /* y */
-      bp_end = bp + pbi->YPlaneFragments;
-      h = pbi->HFragments;
-      stride = pbi->YStride;
-      break;
-    default: /* u,v, 4:20 specific */
-      bp_end = bp + pbi->UVPlaneFragments;
-      h = pbi->HFragments >> 1;
-      stride = pbi->UVStride;
-      break;
-    }
-
-    while(bp<bp_end){
-      ogg_uint32_t *bp_left = bp;
-      ogg_uint32_t *bp_right = bp + h;
-      while(bp<bp_right){
-        if(cp[0]){
-          if(bp>bp_left)
-            loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll);
-          if(bp_left>bp_begin)
-            loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll);
-          if(bp+1<bp_right && !cp[1])
-            loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,ll);
-          if(bp+h<bp_end && !cp[h])
-            loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,ll);
-        }
-        bp++;
-        cp++;
-      }
-    }
-  }
-
-  __asm__ __volatile__("emms\n\t");
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_dct_decode_init(DspFunctions *funcs)
-{
-  funcs->LoopFilter = loop_filter_mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 303
Engine/lib/libtheora/lib/enc/x86_64/dsp_mmx.c

@@ -1,303 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dsp_mmx.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-typedef unsigned long long ogg_uint64_t;
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x0080008000800080LL;
-
-#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
-#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
-#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
-
-static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
-                  ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
-                  ogg_uint32_t ReconPixelsPerLine)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  pxor        %%mm7, %%mm7     \n\t"
-
-    ".rept 8                        \n\t"
-    "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */
-    "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr */
-    "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */
-    "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */
-    /* convert from UINT8 to INT16 */
-    "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */
-    "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr) */
-    "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */
-    "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr) */
-    /* start calculation */
-    "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ReconPtr */
-    "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ReconPtr */
-    "  movq        %%mm0,  (%2)     \n\t" /* write answer out */
-    "  movq        %%mm2, 8(%2)     \n\t" /* write answer out */
-    /* Increment pointers */
-    "  add         $16, %2          \n\t"
-    "  add         %3, %0           \n\t"
-    "  add         %4, %1           \n\t"
-    ".endr                          \n\t"
-
-     : "+r" (FiltPtr),
-       "+r" (ReconPtr),
-       "+r" (DctInputPtr)
-     : "r" ((ogg_uint64_t)PixelsPerLine),
-       "r" ((ogg_uint64_t)ReconPixelsPerLine)
-     : "memory"
-  );
-}
-
-static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
-                      ogg_uint32_t PixelsPerLine)
-{
-  ogg_uint64_t ppl = PixelsPerLine;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  movq        %[V128], %%mm1   \n\t"
-
-    ".rept 8                        \n\t"
-    "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */
-    "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */
-    /* convert from UINT8 to INT16 */
-    "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */
-    "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */
-    /* start calculation */
-    "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - 128 */
-    "  psubw       %%mm1, %%mm2     \n\t" /* mm2 = FiltPtr - 128 */
-    "  movq        %%mm0,  (%1)     \n\t" /* write answer out */
-    "  movq        %%mm2, 8(%1)     \n\t" /* write answer out */
-    /* Increment pointers */
-    "  add         $16, %1           \n\t"
-    "  add         %2, %0           \n\t"
-    ".endr                          \n\t"
-
-     : "+r" (FiltPtr),
-       "+r" (DctInputPtr)
-     : "r" (ppl), /* gcc bug? a cast won't work here, e.g. (ogg_uint64_t)PixelsPerLine */
-       [V128] "m" (V128)
-     : "memory"
-  );
-}
-
-static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
-                     unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
-                     ogg_uint32_t PixelsPerLine,
-                     ogg_uint32_t ReconPixelsPerLine)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  pxor        %%mm7, %%mm7     \n\t"
-
-    ".rept 8                        \n\t"
-    "  movq        (%0), %%mm0      \n\t" /* mm0 = FiltPtr */
-    "  movq        (%1), %%mm1      \n\t" /* mm1 = ReconPtr1 */
-    "  movq        (%2), %%mm4      \n\t" /* mm1 = ReconPtr2 */
-    "  movq        %%mm0, %%mm2     \n\t" /* dup to prepare for up conversion */
-    "  movq        %%mm1, %%mm3     \n\t" /* dup to prepare for up conversion */
-    "  movq        %%mm4, %%mm5     \n\t" /* dup to prepare for up conversion */
-    /* convert from UINT8 to INT16 */
-    "  punpcklbw   %%mm7, %%mm0     \n\t" /* mm0 = INT16(FiltPtr) */
-    "  punpcklbw   %%mm7, %%mm1     \n\t" /* mm1 = INT16(ReconPtr1) */
-    "  punpcklbw   %%mm7, %%mm4     \n\t" /* mm1 = INT16(ReconPtr2) */
-    "  punpckhbw   %%mm7, %%mm2     \n\t" /* mm2 = INT16(FiltPtr) */
-    "  punpckhbw   %%mm7, %%mm3     \n\t" /* mm3 = INT16(ReconPtr1) */
-    "  punpckhbw   %%mm7, %%mm5     \n\t" /* mm3 = INT16(ReconPtr2) */
-    /* average ReconPtr1 and ReconPtr2 */
-    "  paddw       %%mm4, %%mm1     \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */
-    "  paddw       %%mm5, %%mm3     \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */
-    "  psrlw       $1, %%mm1        \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
-    "  psrlw       $1, %%mm3        \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
-    "  psubw       %%mm1, %%mm0     \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-    "  psubw       %%mm3, %%mm2     \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
-    "  movq        %%mm0,  (%3)     \n\t" /* write answer out */
-    "  movq        %%mm2, 8(%3)     \n\t" /* write answer out */
-    /* Increment pointers */
-    "  add         $16, %3           \n\t"
-    "  add         %4, %0           \n\t"
-    "  add         %5, %1           \n\t"
-    "  add         %5, %2           \n\t"
-    ".endr                          \n\t"
-
-     : "+r" (FiltPtr),
-       "+r" (ReconPtr1),
-       "+r" (ReconPtr2),
-       "+r" (DctInputPtr)
-     : "r" ((ogg_uint64_t)PixelsPerLine),
-       "r" ((ogg_uint64_t)ReconPixelsPerLine)
-     : "memory"
-  );
-}
-
-static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
-{
-  ogg_uint64_t  XSum;
-  ogg_uint64_t  XXSum;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  pxor        %%mm5, %%mm5     \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  mov         $8, %%rdi        \n\t"
-    "1:                             \n\t"
-    "  movq        (%2), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        %%mm0, %%mm2     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"
-    "  punpckhbw   %%mm6, %%mm2     \n\t"
-
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  paddw       %%mm2, %%mm5     \n\t"
-
-    "  pmaddwd     %%mm0, %%mm0     \n\t"
-    "  pmaddwd     %%mm2, %%mm2     \n\t"
-
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  paddd       %%mm2, %%mm7     \n\t"
-
-    "  add         %3, %2           \n\t"       /* Inc pointer into src data */
-
-    "  dec         %%rdi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $32, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $16, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movd        %%mm5, %%rdi     \n\t"
-    "  movsx       %%di, %%rdi      \n\t"
-    "  mov         %%rdi, %0        \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %1        \n\t"
-
-     : "=r" (XSum),
-       "=r" (XXSum),
-       "+r" (DataPtr)
-     : "r" ((ogg_uint64_t)Stride)
-     : "rdi", "memory"
-  );
-
-  /* Compute population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ) );
-}
-
-static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                 unsigned char *RefDataPtr, ogg_uint32_t RefStride)
-{
-  ogg_uint64_t  XSum;
-  ogg_uint64_t  XXSum;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  pxor        %%mm5, %%mm5     \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  mov         $8, %%rdi        \n\t"
-    "1:                             \n\t"
-    "  movq        (%2), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%3), %%mm1      \n\t"
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  movq        %%mm1, %%mm3     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"
-    "  punpcklbw   %%mm6, %%mm1     \n\t"
-    "  punpckhbw   %%mm6, %%mm2     \n\t"
-    "  punpckhbw   %%mm6, %%mm3     \n\t"
-
-    "  psubsw      %%mm1, %%mm0     \n\t"
-    "  psubsw      %%mm3, %%mm2     \n\t"
-
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  paddw       %%mm2, %%mm5     \n\t"
-
-    "  pmaddwd     %%mm0, %%mm0     \n\t"
-    "  pmaddwd     %%mm2, %%mm2     \n\t"
-
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  paddd       %%mm2, %%mm7     \n\t"
-
-    "  add         %4, %2           \n\t"       /* Inc pointer into src data */
-    "  add         %5, %3           \n\t"       /* Inc pointer into ref data */
-
-    "  dec         %%rdi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $32, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $16, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movd        %%mm5, %%rdi     \n\t"
-    "  movsx       %%di, %%rdi      \n\t"
-    "  mov         %%rdi, %0        \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %1        \n\t"
-
-     : "=m" (XSum),
-       "=m" (XXSum),
-       "+r" (SrcData),
-       "+r" (RefDataPtr)
-     : "r" ((ogg_uint64_t)SrcStride),
-       "r" ((ogg_uint64_t)RefStride)
-     : "rdi", "memory"
-  );
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-}
-
-static void restore_fpu (void)
-{
-  __asm__ __volatile__ (
-    "  emms                         \n\t"
-  );
-}
-
-void dsp_mmx_init(DspFunctions *funcs)
-{
-  funcs->restore_fpu = restore_fpu;
-  funcs->sub8x8 = sub8x8__mmx;
-  funcs->sub8x8_128 = sub8x8_128__mmx;
-  funcs->sub8x8avg2 = sub8x8avg2__mmx;
-  funcs->intra8x8_err = intra8x8_err__mmx;
-  funcs->inter8x8_err = inter8x8_err__mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 323
Engine/lib/libtheora/lib/enc/x86_64/dsp_mmxext.c

@@ -1,323 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: dsp_mmxext.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include <stdlib.h>
-
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-typedef unsigned long long ogg_uint64_t;
-
-static ogg_uint32_t sad8x8__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
-                                    unsigned char *ptr2, ogg_uint32_t stride2)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-    "  pxor %%mm7, %%mm7            \n\t"       /* mm7 contains the result */
-
-    ".rept 7                        \n\t"
-    "  movq (%1), %%mm0             \n\t"       /* take 8 bytes */
-    "  movq (%2), %%mm1             \n\t"
-    "  psadbw %%mm1, %%mm0          \n\t"
-    "  add %3, %1                   \n\t"       /* Inc pointer into the new data */
-    "  paddw %%mm0, %%mm7           \n\t"       /* accumulate difference... */
-    "  add %4, %2                   \n\t"       /* Inc pointer into ref data */
-    ".endr                          \n\t"
-
-    "  movq (%1), %%mm0             \n\t"       /* take 8 bytes */
-    "  movq (%2), %%mm1             \n\t"
-    "  psadbw %%mm1, %%mm0          \n\t"
-    "  paddw %%mm0, %%mm7           \n\t"       /* accumulate difference... */
-    "  movd %%mm7, %0               \n\t"
-
-     : "=r" (DiffVal),
-       "+r" (ptr1),
-       "+r" (ptr2)
-     : "r" ((ogg_uint64_t)stride1),
-       "r" ((ogg_uint64_t)stride2)
-     : "memory"
-  );
-
-  return DiffVal;
-}
-
-static ogg_uint32_t sad8x8_thres__mmxext (unsigned char *ptr1, ogg_uint32_t stride1,
-                                          unsigned char *ptr2, ogg_uint32_t stride2,
-                                  ogg_uint32_t thres)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-    "  pxor %%mm7, %%mm7            \n\t"       /* mm7 contains the result */
-
-    ".rept 8                        \n\t"
-    "  movq (%1), %%mm0             \n\t"       /* take 8 bytes */
-    "  movq (%2), %%mm1             \n\t"
-    "  psadbw %%mm1, %%mm0          \n\t"
-    "  add %3, %1                   \n\t"       /* Inc pointer into the new data */
-    "  paddw %%mm0, %%mm7           \n\t"       /* accumulate difference... */
-    "  add %4, %2                   \n\t"       /* Inc pointer into ref data */
-    ".endr                          \n\t"
-
-    "  movd %%mm7, %0               \n\t"
-
-     : "=r" (DiffVal),
-       "+r" (ptr1),
-       "+r" (ptr2)
-     : "r" ((ogg_uint64_t)stride1),
-       "r" ((ogg_uint64_t)stride2)
-     : "memory"
-  );
-
-  return DiffVal;
-}
-
-static ogg_uint32_t sad8x8_xy2_thres__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                              unsigned char *RefDataPtr1,
-                                              unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
-                                              ogg_uint32_t thres)
-{
-  ogg_uint32_t  DiffVal;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-    "  pxor %%mm7, %%mm7            \n\t"       /* mm7 contains the result */
-    ".rept 8                        \n\t"
-    "  movq (%1), %%mm0             \n\t"       /* take 8 bytes */
-    "  movq (%2), %%mm1             \n\t"
-    "  movq (%3), %%mm2             \n\t"
-    "  pavgb %%mm2, %%mm1           \n\t"
-    "  psadbw %%mm1, %%mm0          \n\t"
-
-    "  add %4, %1                   \n\t"       /* Inc pointer into the new data */
-    "  paddw %%mm0, %%mm7           \n\t"       /* accumulate difference... */
-    "  add %5, %2                   \n\t"       /* Inc pointer into ref data */
-    "  add %5, %3                   \n\t"       /* Inc pointer into ref data */
-    ".endr                          \n\t"
-
-    "  movd %%mm7, %0               \n\t"
-     : "=m" (DiffVal),
-       "+r" (SrcData),
-       "+r" (RefDataPtr1),
-       "+r" (RefDataPtr2)
-     : "r" ((ogg_uint64_t)SrcStride),
-       "r" ((ogg_uint64_t)RefStride)
-     : "memory"
-  );
-
-  return DiffVal;
-}
-
-static ogg_uint32_t row_sad8__mmxext (unsigned char *Src1, unsigned char *Src2)
-{
-  ogg_uint32_t MaxSad;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  movd        (%1), %%mm0      \n\t"
-    "  movd        (%2), %%mm1      \n\t"
-    "  psadbw      %%mm0, %%mm1     \n\t"
-    "  movd        4(%1), %%mm2     \n\t"
-    "  movd        4(%2), %%mm3     \n\t"
-    "  psadbw      %%mm2, %%mm3     \n\t"
-
-    "  pmaxsw      %%mm1, %%mm3     \n\t"
-    "  movd        %%mm3, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=m" (MaxSad),
-       "+r" (Src1),
-       "+r" (Src2)
-     :
-     : "memory"
-  );
-
-  return MaxSad;
-}
-
-static ogg_uint32_t col_sad8x8__mmxext (unsigned char *Src1, unsigned char *Src2,
-                                    ogg_uint32_t stride)
-{
-  ogg_uint32_t MaxSad;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  pxor        %%mm3, %%mm3     \n\t"       /* zero out mm3 for unpack */
-    "  pxor        %%mm4, %%mm4     \n\t"       /* mm4 low sum */
-    "  pxor        %%mm5, %%mm5     \n\t"       /* mm5 high sum */
-    "  pxor        %%mm6, %%mm6     \n\t"       /* mm6 low sum */
-    "  pxor        %%mm7, %%mm7     \n\t"       /* mm7 high sum */
-    "  mov         $4, %%rdi        \n\t"       /* 4 rows */
-    "1:                             \n\t"
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%2), %%mm1      \n\t"       /* take 8 bytes */
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm3, %%mm0     \n\t"       /* unpack to higher precision for accumulation */
-    "  paddw       %%mm0, %%mm4     \n\t"       /* accumulate difference... */
-    "  punpckhbw   %%mm3, %%mm1     \n\t"       /* unpack high four bytes to higher precision */
-    "  paddw       %%mm1, %%mm5     \n\t"       /* accumulate difference... */
-    "  add         %3, %1           \n\t"       /* Inc pointer into the new data */
-    "  add         %3, %2           \n\t"       /* Inc pointer into the new data */
-
-    "  dec         %%rdi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  mov         $4, %%rdi        \n\t"       /* 4 rows */
-    "2:                             \n\t"
-    "  movq        (%1), %%mm0      \n\t"       /* take 8 bytes */
-    "  movq        (%2), %%mm1      \n\t"       /* take 8 bytes */
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  psubusb     %%mm1, %%mm0     \n\t"       /* A - B */
-    "  psubusb     %%mm2, %%mm1     \n\t"       /* B - A */
-    "  por         %%mm1, %%mm0     \n\t"       /* and or gives abs difference */
-    "  movq        %%mm0, %%mm1     \n\t"
-
-    "  punpcklbw   %%mm3, %%mm0     \n\t"       /* unpack to higher precision for accumulation */
-    "  paddw       %%mm0, %%mm6     \n\t"       /* accumulate difference... */
-    "  punpckhbw   %%mm3, %%mm1     \n\t"       /* unpack high four bytes to higher precision */
-    "  paddw       %%mm1, %%mm7     \n\t"       /* accumulate difference... */
-    "  add         %3, %1           \n\t"       /* Inc pointer into the new data */
-    "  add         %3, %2           \n\t"       /* Inc pointer into the new data */
-
-    "  dec         %%rdi            \n\t"
-    "  jnz 2b                       \n\t"
-
-    "  pmaxsw      %%mm6, %%mm7     \n\t"
-    "  pmaxsw      %%mm4, %%mm5     \n\t"
-    "  pmaxsw      %%mm5, %%mm7     \n\t"
-    "  movq        %%mm7, %%mm6     \n\t"
-    "  psrlq       $32, %%mm6       \n\t"
-    "  pmaxsw      %%mm6, %%mm7     \n\t"
-    "  movq        %%mm7, %%mm6     \n\t"
-    "  psrlq       $16, %%mm6       \n\t"
-    "  pmaxsw      %%mm6, %%mm7     \n\t"
-    "  movd        %%mm7, %0        \n\t"
-    "  andl        $0xffff, %0      \n\t"
-
-     : "=r" (MaxSad),
-       "+r" (Src1),
-       "+r" (Src2)
-     : "r" ((ogg_uint64_t)stride)
-     : "memory", "rdi"
-  );
-
-  return MaxSad;
-}
-
-static ogg_uint32_t inter8x8_err_xy2__mmxext (unsigned char *SrcData, ogg_uint32_t SrcStride,
-                                              unsigned char *RefDataPtr1,
-                                              unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
-{
-  ogg_uint64_t XSum;
-  ogg_uint64_t XXSum;
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-
-    "  pxor        %%mm4, %%mm4     \n\t"
-    "  pxor        %%mm5, %%mm5     \n\t"
-    "  pxor        %%mm6, %%mm6     \n\t"
-    "  pxor        %%mm7, %%mm7     \n\t"
-    "  mov         $8, %%rdi        \n\t"
-    "1:                             \n\t"
-    "  movq        (%2), %%mm0      \n\t"       /* take 8 bytes */
-
-    "  movq        (%3), %%mm2      \n\t"
-    "  movq        (%4), %%mm1      \n\t"       /* take average of mm2 and mm1 */
-    "  pavgb       %%mm2, %%mm1     \n\t"
-
-    "  movq        %%mm0, %%mm2     \n\t"
-    "  movq        %%mm1, %%mm3     \n\t"
-
-    "  punpcklbw   %%mm6, %%mm0     \n\t"
-    "  punpcklbw   %%mm4, %%mm1     \n\t"
-    "  punpckhbw   %%mm6, %%mm2     \n\t"
-    "  punpckhbw   %%mm4, %%mm3     \n\t"
-
-    "  psubsw      %%mm1, %%mm0     \n\t"
-    "  psubsw      %%mm3, %%mm2     \n\t"
-
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  paddw       %%mm2, %%mm5     \n\t"
-
-    "  pmaddwd     %%mm0, %%mm0     \n\t"
-    "  pmaddwd     %%mm2, %%mm2     \n\t"
-
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  paddd       %%mm2, %%mm7     \n\t"
-
-    "  add         %5, %2           \n\t"       /* Inc pointer into src data */
-    "  add         %6, %3           \n\t"       /* Inc pointer into ref data */
-    "  add         %6, %4           \n\t"       /* Inc pointer into ref data */
-
-    "  dec         %%rdi            \n\t"
-    "  jnz 1b                       \n\t"
-
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $32, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movq        %%mm5, %%mm0     \n\t"
-    "  psrlq       $16, %%mm5       \n\t"
-    "  paddw       %%mm0, %%mm5     \n\t"
-    "  movd        %%mm5, %%edi     \n\t"
-    "  movsx       %%di, %%edi      \n\t"
-    "  movl        %%edi, %0        \n\t"
-
-    "  movq        %%mm7, %%mm0     \n\t"
-    "  psrlq       $32, %%mm7       \n\t"
-    "  paddd       %%mm0, %%mm7     \n\t"
-    "  movd        %%mm7, %1        \n\t"
-
-     : "=m" (XSum),
-       "=m" (XXSum),
-       "+r" (SrcData),
-       "+r" (RefDataPtr1),
-       "+r" (RefDataPtr2)
-     : "r" ((ogg_uint64_t)SrcStride),
-       "r" ((ogg_uint64_t)RefStride)
-     : "rdi", "memory"
-  );
-
-  /* Compute and return population variance as mis-match metric. */
-  return (( (XXSum<<6) - XSum*XSum ));
-}
-
-void dsp_mmxext_init(DspFunctions *funcs)
-{
-  funcs->row_sad8 = row_sad8__mmxext;
-  funcs->col_sad8x8 = col_sad8x8__mmxext;
-  funcs->sad8x8 = sad8x8__mmxext;
-  funcs->sad8x8_thres = sad8x8_thres__mmxext;
-  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmxext;
-  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmxext;
-}
-
-#endif /* USE_ASM */

+ 0 - 342
Engine/lib/libtheora/lib/enc/x86_64/fdct_mmx.c

@@ -1,342 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************/
-
-/* mmx fdct implementation for x86_64 */
-/* $Id: fdct_mmx.c 15397 2008-10-14 02:06:24Z tterribe $ */
-
-#include "theora/theora.h"
-#include "../codec_internal.h"
-#include "../dsp.h"
-
-#if defined(USE_ASM)
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC4S4 = 0x0b505b505b505b505LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC6S2 = 0x061f861f861f861f8LL;
-static const __attribute__ ((aligned(8),used)) ogg_int64_t xC7S1 = 0x031f131f131f131f1LL;
-
-#if defined(__MINGW32__) || defined(__CYGWIN__) || \
-    defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
-# define M(a) "_" #a
-#else
-# define M(a) #a
-#endif
-
-/* execute stage 1 of forward DCT */
-#define Fdct_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,temp)                        \
-  "  movq      " #ip0 ", %%mm0      \n\t"                                     \
-  "  movq      " #ip1 ", %%mm1      \n\t"                                     \
-  "  movq      " #ip3 ", %%mm2      \n\t"                                     \
-  "  movq      " #ip5 ", %%mm3      \n\t"                                     \
-  "  movq        %%mm0, %%mm4       \n\t"                                     \
-  "  movq        %%mm1, %%mm5       \n\t"                                     \
-  "  movq        %%mm2, %%mm6       \n\t"                                     \
-  "  movq        %%mm3, %%mm7       \n\t"                                     \
-                                                                              \
-  "  paddsw    " #ip7 ", %%mm0      \n\t" /* mm0 = ip0 + ip7 = is07 */        \
-  "  paddsw    " #ip2 ", %%mm1      \n\t" /* mm1 = ip1 + ip2 = is12 */        \
-  "  paddsw    " #ip4 ", %%mm2      \n\t" /* mm2 = ip3 + ip4 = is34 */        \
-  "  paddsw    " #ip6 ", %%mm3      \n\t" /* mm3 = ip5 + ip6 = is56 */        \
-  "  psubsw    " #ip7 ", %%mm4      \n\t" /* mm4 = ip0 - ip7 = id07 */        \
-  "  psubsw    " #ip2 ", %%mm5      \n\t" /* mm5 = ip1 - ip2 = id12 */        \
-                                                                              \
-  "  psubsw      %%mm2, %%mm0       \n\t" /* mm0 = is07 - is34 */             \
-                                                                              \
-  "  paddsw      %%mm2, %%mm2       \n\t"                                     \
-                                                                              \
-  "  psubsw    " #ip4 ", %%mm6      \n\t" /* mm6 = ip3 - ip4 = id34 */        \
-                                                                              \
-  "  paddsw      %%mm0, %%mm2       \n\t" /* mm2 = is07 + is34 = is0734 */    \
-  "  psubsw      %%mm3, %%mm1       \n\t" /* mm1 = is12 - is56 */             \
-  "  movq        %%mm0," #temp "    \n\t" /* Save is07 - is34 to free mm0; */ \
-  "  paddsw      %%mm3, %%mm3       \n\t"                                     \
-  "  paddsw      %%mm1, %%mm3       \n\t" /* mm3 = is12 + 1s56  = is1256 */   \
-                                                                              \
-  "  psubsw    " #ip6 ", %%mm7      \n\t" /* mm7 = ip5 - ip6 = id56 */        \
-  /* ------------------------------------------------------------------- */   \
-  "  psubsw      %%mm7, %%mm5       \n\t" /* mm5 = id12 - id56 */             \
-  "  paddsw      %%mm7, %%mm7       \n\t"                                     \
-  "  paddsw      %%mm5, %%mm7       \n\t" /* mm7 = id12 + id56 */             \
-  /* ------------------------------------------------------------------- */   \
-  "  psubsw      %%mm3, %%mm2       \n\t" /* mm2 = is0734 - is1256 */         \
-  "  paddsw      %%mm3, %%mm3       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm2, %%mm0       \n\t" /* make a copy */                   \
-  "  paddsw      %%mm2, %%mm3       \n\t" /* mm3 = is0734 + is1256 */         \
-                                                                              \
-  "  pmulhw      %[xC4S4], %%mm0    \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */ \
-  "  paddw       %%mm2, %%mm0       \n\t" /* mm0 = xC4S4 * ( is0734 - is1256 ) */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  paddw       %%mm2, %%mm0       \n\t" /* Truncate mm0, now it is op[4] */ \
-                                                                              \
-  "  movq        %%mm3, %%mm2       \n\t"                                     \
-  "  movq        %%mm0," #ip4 "     \n\t" /* save ip4, now mm0,mm2 are free */ \
-                                                                              \
-  "  movq        %%mm3, %%mm0       \n\t"                                     \
-  "  pmulhw      %[xC4S4], %%mm3    \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */ \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  paddw       %%mm0, %%mm3       \n\t" /* mm3 = xC4S4 * ( is0734 +is1256 )    */ \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncate mm3, now it is op[0] */ \
-                                                                              \
-  "  movq        %%mm3," #ip0 "     \n\t"                                     \
-  /* ------------------------------------------------------------------- */   \
-  "  movq      " #temp ", %%mm3     \n\t" /* mm3 = irot_input_y */            \
-  "  pmulhw      %[xC2S6], %%mm3     \n\t" /* mm3 = xC2S6 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  movq      " #temp ", %%mm2     \n\t"                                     \
-  "  movq        %%mm2, %%mm0       \n\t"                                     \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t" /* mm3 = xC2S6 * irot_input_y */    \
-  "  paddw       %%mm0, %%mm3       \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncated */                     \
-  "  movq        %%mm5, %%mm0       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm5, %%mm2       \n\t"                                     \
-  "  pmulhw      %[xC6S2], %%mm0    \n\t" /* mm0 = xC6S2 * irot_input_x */    \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  paddw       %%mm2, %%mm0       \n\t" /* Truncated */                     \
-                                                                              \
-  "  paddsw      %%mm0, %%mm3       \n\t" /* ip[2] */                         \
-  "  movq        %%mm3," #ip2 "     \n\t" /* Save ip2 */                      \
-                                                                              \
-  "  movq        %%mm5, %%mm0       \n\t"                                     \
-  "  movq        %%mm5, %%mm2       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %[xC2S6], %%mm5     \n\t" /* mm5 = xC2S6 * irot_input_x - irot_input_x */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  movq      " #temp ", %%mm3     \n\t"                                     \
-  "  paddw       %%mm0, %%mm5       \n\t" /* mm5 = xC2S6 * irot_input_x */    \
-                                                                              \
-  "  paddw       %%mm2, %%mm5       \n\t" /* Truncated */                     \
-  "  movq        %%mm3, %%mm2       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %[xC6S2], %%mm3    \n\t" /* mm3 = xC6S2 * irot_input_y */    \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncated */                     \
-  "  psubsw      %%mm5, %%mm3       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm3," #ip6 "     \n\t"                                     \
-  /* ------------------------------------------------------------------- */   \
-  "  movq        %[xC4S4], %%mm0    \n\t"                                     \
-  "  movq        %%mm1, %%mm2       \n\t"                                     \
-  "  movq        %%mm1, %%mm3       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm0, %%mm1       \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm1       \n\t" /* mm0 = xC4S4 * ( is12 - is56 ) */ \
-  "  paddw       %%mm2, %%mm1       \n\t" /* Truncate mm1, now it is icommon_product1 */ \
-                                                                              \
-  "  movq        %%mm7, %%mm2       \n\t"                                     \
-  "  movq        %%mm7, %%mm3       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm0, %%mm7       \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */ \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm7       \n\t" /* mm7 = xC4S4 * ( id12 + id56 ) */ \
-  "  paddw       %%mm2, %%mm7       \n\t" /* Truncate mm7, now it is icommon_product2 */ \
-  /* ------------------------------------------------------------------- */   \
-  "  pxor        %%mm0, %%mm0       \n\t" /* Clear mm0 */                     \
-  "  psubsw      %%mm6, %%mm0       \n\t" /* mm0 = - id34 */                  \
-                                                                              \
-  "  psubsw      %%mm7, %%mm0       \n\t" /* mm0 = - ( id34 + idcommon_product2 ) */ \
-  "  paddsw      %%mm6, %%mm6       \n\t"                                     \
-  "  paddsw      %%mm0, %%mm6       \n\t" /* mm6 = id34 - icommon_product2 */ \
-                                                                              \
-  "  psubsw      %%mm1, %%mm4       \n\t" /* mm4 = id07 - icommon_product1 */ \
-  "  paddsw      %%mm1, %%mm1       \n\t"                                     \
-  "  paddsw      %%mm4, %%mm1       \n\t" /* mm1 = id07 + icommon_product1 */ \
-  /* ------------------------------------------------------------------- */   \
-  "  movq        %[xC1S7], %%mm7    \n\t"                                     \
-  "  movq        %%mm1, %%mm2       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm1, %%mm3       \n\t"                                     \
-  "  pmulhw      %%mm7, %%mm1       \n\t" /* mm1 = xC1S7 * irot_input_x - irot_input_x */ \
-                                                                              \
-  "  movq        %[xC7S1], %%mm7    \n\t"                                     \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm1       \n\t" /* mm1 = xC1S7 * irot_input_x */    \
-  "  paddw       %%mm2, %%mm1       \n\t" /* Trucated */                      \
-                                                                              \
-  "  pmulhw      %%mm7, %%mm3       \n\t" /* mm3 = xC7S1 * irot_input_x */    \
-  "  paddw       %%mm2, %%mm3       \n\t" /* Truncated */                     \
-                                                                              \
-  "  movq        %%mm0, %%mm5       \n\t"                                     \
-  "  movq        %%mm0, %%mm2       \n\t"                                     \
-                                                                              \
-  "  movq        %[xC1S7], %%mm7    \n\t"                                     \
-  "  pmulhw      %%mm7, %%mm0       \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  movq        %[xC7S1], %%mm7    \n\t"                                     \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm5, %%mm0       \n\t" /* mm0 = xC1S7 * irot_input_y */    \
-  "  paddw       %%mm2, %%mm0       \n\t" /* Truncated */                     \
-                                                                              \
-  "  pmulhw      %%mm7, %%mm5       \n\t" /* mm5 = xC7S1 * irot_input_y */    \
-  "  paddw       %%mm2, %%mm5       \n\t" /* Truncated */                     \
-                                                                              \
-  "  psubsw      %%mm5, %%mm1       \n\t" /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ \
-  "  paddsw      %%mm0, %%mm3       \n\t" /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ \
-                                                                              \
-  "  movq        %%mm1," #ip1 "     \n\t"                                     \
-  "  movq        %%mm3," #ip7 "     \n\t"                                     \
-  /* ------------------------------------------------------------------- */   \
-  "  movq        %[xC3S5], %%mm0    \n\t"                                     \
-  "  movq        %[xC5S3], %%mm1    \n\t"                                     \
-                                                                              \
-  "  movq        %%mm6, %%mm5       \n\t"                                     \
-  "  movq        %%mm6, %%mm7       \n\t"                                     \
-                                                                              \
-  "  movq        %%mm4, %%mm2       \n\t"                                     \
-  "  movq        %%mm4, %%mm3       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm0, %%mm4       \n\t" /* mm4 = xC3S5 * irot_input_x - irot_input_x */ \
-  "  pmulhw      %%mm1, %%mm6       \n\t" /* mm6 = xC5S3 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  psrlw       $15, %%mm2         \n\t"                                     \
-  "  psrlw       $15, %%mm5         \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm3, %%mm4       \n\t" /* mm4 = xC3S5 * irot_input_x */    \
-  "  paddw       %%mm7, %%mm6       \n\t" /* mm6 = xC5S3 * irot_input_y */    \
-                                                                              \
-  "  paddw       %%mm2, %%mm4       \n\t" /* Truncated */                     \
-  "  paddw       %%mm5, %%mm6       \n\t" /* Truncated */                     \
-                                                                              \
-  "  psubsw      %%mm6, %%mm4       \n\t" /* ip3 */                           \
-  "  movq        %%mm4," #ip3 "     \n\t"                                     \
-                                                                              \
-  "  movq        %%mm3, %%mm4       \n\t"                                     \
-  "  movq        %%mm7, %%mm6       \n\t"                                     \
-                                                                              \
-  "  pmulhw      %%mm1, %%mm3       \n\t" /* mm3 = xC5S3 * irot_input_x - irot_input_x */ \
-  "  pmulhw      %%mm0, %%mm7       \n\t" /* mm7 = xC3S5 * irot_input_y - irot_input_y */ \
-                                                                              \
-  "  paddw       %%mm2, %%mm4       \n\t"                                     \
-  "  paddw       %%mm5, %%mm6       \n\t"                                     \
-                                                                              \
-  "  paddw       %%mm4, %%mm3       \n\t" /* mm3 = xC5S3 * irot_input_x */    \
-  "  paddw       %%mm6, %%mm7       \n\t" /* mm7 = xC3S5 * irot_input_y */    \
-                                                                              \
-  "  paddw       %%mm7, %%mm3       \n\t" /* ip5 */                           \
-  "  movq        %%mm3," #ip5 "     \n\t"
-
-#define Transpose_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,                  \
-                      op0,op1,op2,op3,op4,op5,op6,op7)                  \
-  "  movq      " #ip0 ", %%mm0      \n\t" /* mm0 = a0 a1 a2 a3 */       \
-  "  movq      " #ip4 ", %%mm4      \n\t" /* mm4 = e4 e5 e6 e7 */       \
-  "  movq      " #ip1 ", %%mm1      \n\t" /* mm1 = b0 b1 b2 b3 */       \
-  "  movq      " #ip5 ", %%mm5      \n\t" /* mm5 = f4 f5 f6 f7 */       \
-  "  movq      " #ip2 ", %%mm2      \n\t" /* mm2 = c0 c1 c2 c3 */       \
-  "  movq      " #ip6 ", %%mm6      \n\t" /* mm6 = g4 g5 g6 g7 */       \
-  "  movq      " #ip3 ", %%mm3      \n\t" /* mm3 = d0 d1 d2 d3 */       \
-  "  movq        %%mm1," #op1 "     \n\t" /* save  b0 b1 b2 b3 */       \
-  "  movq      " #ip7 ", %%mm7      \n\t" /* mm7 = h0 h1 h2 h3 */       \
-   /* Transpose 2x8 block */                                            \
-  "  movq        %%mm4, %%mm1       \n\t" /* mm1 = e3 e2 e1 e0 */       \
-  "  punpcklwd   %%mm5, %%mm4       \n\t" /* mm4 = f1 e1 f0 e0 */       \
-  "  movq        %%mm0," #op0 "     \n\t" /* save a3 a2 a1 a0  */       \
-  "  punpckhwd   %%mm5, %%mm1       \n\t" /* mm1 = f3 e3 f2 e2 */       \
-  "  movq        %%mm6, %%mm0       \n\t" /* mm0 = g3 g2 g1 g0 */       \
-  "  punpcklwd   %%mm7, %%mm6       \n\t" /* mm6 = h1 g1 h0 g0 */       \
-  "  movq        %%mm4, %%mm5       \n\t" /* mm5 = f1 e1 f0 e0 */       \
-  "  punpckldq   %%mm6, %%mm4       \n\t" /* mm4 = h0 g0 f0 e0 = MM4 */ \
-  "  punpckhdq   %%mm6, %%mm5       \n\t" /* mm5 = h1 g1 f1 e1 = MM5 */ \
-  "  movq        %%mm1, %%mm6       \n\t" /* mm6 = f3 e3 f2 e2 */       \
-  "  movq        %%mm4," #op4 "     \n\t"                               \
-  "  punpckhwd   %%mm7, %%mm0       \n\t" /* mm0 = h3 g3 h2 g2 */       \
-  "  movq        %%mm5," #op5 "     \n\t"                               \
-  "  punpckhdq   %%mm0, %%mm6       \n\t" /* mm6 = h3 g3 f3 e3 = MM7 */ \
-  "  movq      " #op0 ", %%mm4      \n\t" /* mm4 = a3 a2 a1 a0 */       \
-  "  punpckldq   %%mm0, %%mm1       \n\t" /* mm1 = h2 g2 f2 e2 = MM6 */ \
-  "  movq      " #op1 ", %%mm5      \n\t" /* mm5 = b3 b2 b1 b0 */       \
-  "  movq        %%mm4, %%mm0       \n\t" /* mm0 = a3 a2 a1 a0 */       \
-  "  movq        %%mm6," #op7 "     \n\t"                               \
-  "  punpcklwd   %%mm5, %%mm0       \n\t" /* mm0 = b1 a1 b0 a0 */       \
-  "  movq        %%mm1," #op6 "     \n\t"                               \
-  "  punpckhwd   %%mm5, %%mm4       \n\t" /* mm4 = b3 a3 b2 a2 */       \
-  "  movq        %%mm2, %%mm5       \n\t" /* mm5 = c3 c2 c1 c0 */       \
-  "  punpcklwd   %%mm3, %%mm2       \n\t" /* mm2 = d1 c1 d0 c0 */       \
-  "  movq        %%mm0, %%mm1       \n\t" /* mm1 = b1 a1 b0 a0 */       \
-  "  punpckldq   %%mm2, %%mm0       \n\t" /* mm0 = d0 c0 b0 a0 = MM0 */ \
-  "  punpckhdq   %%mm2, %%mm1       \n\t" /* mm1 = d1 c1 b1 a1 = MM1 */ \
-  "  movq        %%mm4, %%mm2       \n\t" /* mm2 = b3 a3 b2 a2 */       \
-  "  movq        %%mm0," #op0 "     \n\t"                               \
-  "  punpckhwd   %%mm3, %%mm5       \n\t" /* mm5 = d3 c3 d2 c2 */       \
-  "  movq        %%mm1," #op1 "     \n\t"                               \
-  "  punpckhdq   %%mm5, %%mm4       \n\t" /* mm4 = d3 c3 b3 a3 = MM3 */ \
-  "  punpckldq   %%mm5, %%mm2       \n\t" /* mm2 = d2 c2 b2 a2 = MM2 */ \
-  "  movq        %%mm4," #op3 "     \n\t"                               \
-  "  movq        %%mm2," #op2 "     \n\t"
-
-
-/* This performs a 2D Forward DCT on an 8x8 block with short
-   coefficients. We try to do the truncation to match the C
-   version. */
-static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
-{
-  ogg_int16_t __attribute__((aligned(8))) temp[8*8];
-
-  __asm__ __volatile__ (
-    "  .balign 16                   \n\t"
-    /*
-     * Input data is an 8x8 block.  To make processing of the data more efficent
-     * we will transpose the block of data to two 4x8 blocks???
-     */
-    Transpose_mmx (  (%0), 16(%0), 32(%0), 48(%0),  8(%0), 24(%0), 40(%0), 56(%0),
-                     (%1), 16(%1), 32(%1), 48(%1),  8(%1), 24(%1), 40(%1), 56(%1))
-    Fdct_mmx      (  (%1), 16(%1), 32(%1), 48(%1),  8(%1), 24(%1), 40(%1), 56(%1), (%2))
-
-    Transpose_mmx (64(%0), 80(%0), 96(%0),112(%0), 72(%0), 88(%0),104(%0),120(%0),
-                   64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1))
-    Fdct_mmx      (64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
-    Transpose_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1),
-                    0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1))
-    Fdct_mmx      ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), (%2))
-
-    Transpose_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1),
-                    8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1))
-    Fdct_mmx      ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))
-
-    "  emms                         \n\t"
-
-    : "+r" (InputData),
-      "+r" (OutputData)
-    : "r" (temp),
-      [xC1S7] "m" (xC1S7),      /* gcc 3.1+ allows named asm parameters */
-      [xC2S6] "m" (xC2S6),
-      [xC3S5] "m" (xC3S5),
-      [xC4S4] "m" (xC4S4),
-      [xC5S3] "m" (xC5S3),
-      [xC6S2] "m" (xC6S2),
-      [xC7S1] "m" (xC7S1)
-    : "memory"
-  );
-}
-
-/* install our implementation in the function table */
-void dsp_mmx_fdct_init(DspFunctions *funcs)
-{
-  funcs->fdct_short = fdct_short__mmx;
-}
-
-#endif /* USE_ASM */

+ 0 - 27
Engine/lib/libtheora/lib/enc/x86_64/idct_mmx.c

@@ -1,27 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: idct_mmx.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-/* nothing implemented right now */
-void dsp_mmx_idct_init(DspFunctions *funcs)
-{
-}
-
-#endif /* USE_ASM */

+ 0 - 184
Engine/lib/libtheora/lib/enc/x86_64/recon_mmx.c

@@ -1,184 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-  last mod: $Id: recon_mmx.c 15397 2008-10-14 02:06:24Z tterribe $
-
- ********************************************************************/
-
-#include "../codec_internal.h"
-
-#if defined(USE_ASM)
-
-typedef unsigned long long ogg_uint64_t;
-
-static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL;
-
-static void copy8x8__mmx (unsigned char *src,
-                          unsigned char *dest,
-                          ogg_uint32_t stride)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                      \n\t"
-
-    "  lea         (%2, %2, 2), %%rdi  \n\t"
-
-    "  movq        (%1), %%mm0         \n\t"
-    "  movq        (%1, %2), %%mm1     \n\t"
-    "  movq        (%1, %2, 2), %%mm2  \n\t"
-    "  movq        (%1, %%rdi), %%mm3  \n\t"
-
-    "  lea         (%1, %2, 4), %1     \n\t"
-
-    "  movq        %%mm0, (%0)         \n\t"
-    "  movq        %%mm1, (%0, %2)     \n\t"
-    "  movq        %%mm2, (%0, %2, 2)  \n\t"
-    "  movq        %%mm3, (%0, %%rdi)  \n\t"
-
-    "  lea         (%0, %2, 4), %0     \n\t"
-
-    "  movq        (%1), %%mm0         \n\t"
-    "  movq        (%1, %2), %%mm1     \n\t"
-    "  movq        (%1, %2, 2), %%mm2  \n\t"
-    "  movq        (%1, %%rdi), %%mm3  \n\t"
-
-    "  movq        %%mm0, (%0)         \n\t"
-    "  movq        %%mm1, (%0, %2)     \n\t"
-    "  movq        %%mm2, (%0, %2, 2)  \n\t"
-    "  movq        %%mm3, (%0, %%rdi)  \n\t"
-      : "+a" (dest)
-      : "c" (src),
-        "d" ((ogg_uint64_t)stride)
-      : "memory", "rdi"
-  );
-}
-
-static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
-                                 ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                      \n\t"
-
-    "  movq        %[V128], %%mm0      \n\t" /* Set mm0 to 0x8080808080808080 */
-
-    "  lea         128(%1), %%rdi      \n\t" /* Endpoint in input buffer */
-    "1:                                \n\t"
-    "  movq         (%1), %%mm2        \n\t" /* First four input values */
-
-    "  packsswb    8(%1), %%mm2        \n\t" /* pack with next(high) four values */
-    "  por         %%mm0, %%mm0        \n\t"
-    "  pxor        %%mm0, %%mm2        \n\t" /* Convert result to unsigned (same as add 128) */
-    "  lea         16(%1), %1          \n\t" /* Step source buffer */
-    "  cmp         %%rdi, %1           \n\t" /* are we done */
-
-    "  movq        %%mm2, (%0)         \n\t" /* store results */
-
-    "  lea         (%0, %2), %0        \n\t" /* Step output buffer */
-    "  jc          1b                  \n\t" /* Loop back if we are not done */
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" ((ogg_uint64_t)LineStep),
-        [V128] "m" (V128)
-      : "memory", "rdi"
-  );
-}
-
-static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
-                                 ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                      \n\t"
-
-    "  pxor        %%mm0, %%mm0        \n\t"
-    "  lea         128(%1), %%rdi      \n\t"
-
-    "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
-
-    "  movq        (%1), %%mm4         \n\t" /* first 4 changes */
-    "  movq        %%mm2, %%mm3        \n\t"
-    "  movq        8(%1), %%mm5        \n\t" /* last 4 changes */
-    "  punpcklbw   %%mm0, %%mm2        \n\t" /* turn first 4 refs into positive 16-bit #s */
-    "  paddsw      %%mm4, %%mm2        \n\t" /* add in first 4 changes */
-    "  punpckhbw   %%mm0, %%mm3        \n\t" /* turn last 4 refs into positive 16-bit #s */
-    "  paddsw      %%mm5, %%mm3        \n\t" /* add in last 4 changes */
-    "  add         %3, %2              \n\t" /* next row of reference pixels */
-    "  packuswb    %%mm3, %%mm2        \n\t" /* pack result to unsigned 8-bit values */
-    "  lea         16(%1), %1          \n\t" /* next row of changes */
-    "  cmp         %%rdi, %1           \n\t" /* are we done? */
-
-    "  movq        %%mm2, (%0)         \n\t" /* store result */
-
-    "  lea         (%0, %3), %0        \n\t" /* next row of output */
-    "  jc          1b                  \n\t"
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (RefPtr),
-        "r" ((ogg_uint64_t)LineStep)
-      : "memory", "rdi"
-  );
-}
-
-static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
-                                      unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
-                                      ogg_uint32_t LineStep)
-{
-  __asm__ __volatile__ (
-    "  .balign 16                      \n\t"
-
-    "  pxor        %%mm0, %%mm0        \n\t"
-    "  lea         128(%1), %%rdi      \n\t"
-
-    "1:                                \n\t"
-    "  movq        (%2), %%mm2         \n\t" /* (+3 misaligned) 8 reference pixels */
-    "  movq        (%3), %%mm4         \n\t" /* (+3 misaligned) 8 reference pixels */
-
-    "  movq        %%mm2, %%mm3        \n\t"
-    "  punpcklbw   %%mm0, %%mm2        \n\t" /* mm2 = start ref1 as positive 16-bit #s */
-    "  movq        %%mm4, %%mm5        \n\t"
-    "  movq        (%1), %%mm6         \n\t" /* first 4 changes */
-    "  punpckhbw   %%mm0, %%mm3        \n\t" /* mm3 = end ref1 as positive 16-bit #s */
-    "  movq        8(%1), %%mm7        \n\t" /* last 4 changes */
-    "  punpcklbw   %%mm0, %%mm4        \n\t" /* mm4 = start ref2 as positive 16-bit #s */
-    "  punpckhbw   %%mm0, %%mm5        \n\t" /* mm5 = end ref2 as positive 16-bit #s */
-    "  paddw       %%mm4, %%mm2        \n\t" /* mm2 = start (ref1 + ref2) */
-    "  paddw       %%mm5, %%mm3        \n\t" /* mm3 = end (ref1 + ref2) */
-    "  psrlw       $1, %%mm2           \n\t" /* mm2 = start (ref1 + ref2)/2 */
-    "  psrlw       $1, %%mm3           \n\t" /* mm3 = end (ref1 + ref2)/2 */
-    "  paddw       %%mm6, %%mm2        \n\t" /* add changes to start */
-    "  paddw       %%mm7, %%mm3        \n\t" /* add changes to end */
-    "  lea         16(%1), %1          \n\t" /* next row of changes */
-    "  packuswb    %%mm3, %%mm2        \n\t" /* pack start|end to unsigned 8-bit */
-    "  add         %4, %2              \n\t" /* next row of reference pixels */
-    "  add         %4, %3              \n\t" /* next row of reference pixels */
-    "  movq        %%mm2, (%0)         \n\t" /* store result */
-    "  add         %4, %0              \n\t" /* next row of output */
-    "  cmp         %%rdi, %1           \n\t" /* are we done? */
-    "  jc          1b                  \n\t"
-      : "+r" (ReconPtr)
-      : "r" (ChangePtr),
-        "r" (RefPtr1),
-        "r" (RefPtr2),
-        "r" ((ogg_uint64_t)LineStep)
-      : "memory", "rdi"
-  );
-}
-
-void dsp_mmx_recon_init(DspFunctions *funcs)
-{
-  funcs->copy8x8 = copy8x8__mmx;
-  funcs->recon_intra8x8 = recon_intra8x8__mmx;
-  funcs->recon_inter8x8 = recon_inter8x8__mmx;
-  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
-}
-
-#endif /* USE_ASM */

+ 168 - 0
Engine/lib/libtheora/lib/encapiwrapper.c

@@ -0,0 +1,168 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+#include "encint.h"
+#include "theora/theoraenc.h"
+
+
+
+static void th_enc_api_clear(th_api_wrapper *_api){
+  if(_api->encode)th_encode_free(_api->encode);
+  memset(_api,0,sizeof(*_api));
+}
+
+static void theora_encode_clear(theora_state *_te){
+  if(_te->i!=NULL)theora_info_clear(_te->i);
+  memset(_te,0,sizeof(*_te));
+}
+
+static int theora_encode_control(theora_state *_te,int _req,
+ void *_buf,size_t _buf_sz){
+  return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode,
+   _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_encode_granule_frame(theora_state *_te,
+ ogg_int64_t _gp){
+  return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){
+  return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={
+  (oc_state_clear_func)theora_encode_clear,
+  (oc_state_control_func)theora_encode_control,
+  (oc_state_granule_frame_func)theora_encode_granule_frame,
+  (oc_state_granule_time_func)theora_encode_granule_time,
+};
+
+int theora_encode_init(theora_state *_te,theora_info *_ci){
+  th_api_info *apiinfo;
+  th_info      info;
+  ogg_uint32_t keyframe_frequency_force;
+  /*Allocate our own combined API wrapper/theora_info struct.
+    We put them both in one malloc'd block so that when the API wrapper is
+     freed, the info struct goes with it.
+    This avoids having to figure out whether or not we need to free the info
+     struct in either theora_info_clear() or theora_clear().*/
+  apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo));
+  if(apiinfo==NULL)return TH_EFAULT;
+  /*Make our own copy of the info struct, since its lifetime should be
+     independent of the one we were passed in.*/
+  *&apiinfo->info=*_ci;
+  oc_theora_info2th_info(&info,_ci);
+  apiinfo->api.encode=th_encode_alloc(&info);
+  if(apiinfo->api.encode==NULL){
+    _ogg_free(apiinfo);
+    return OC_EINVAL;
+  }
+  apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear;
+  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+  _te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL;
+  _te->internal_decode=NULL;
+  _te->granulepos=0;
+  _te->i=&apiinfo->info;
+  _te->i->codec_setup=&apiinfo->api;
+  /*Set the precise requested keyframe frequency.*/
+  keyframe_frequency_force=_ci->keyframe_auto_p?
+   _ci->keyframe_frequency_force:_ci->keyframe_frequency;
+  th_encode_ctl(apiinfo->api.encode,
+   TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
+   &keyframe_frequency_force,sizeof(keyframe_frequency_force));
+  /*TODO: Additional codec setup using the extra fields in theora_info.*/
+  return 0;
+}
+
+int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){
+  th_api_wrapper  *api;
+  th_ycbcr_buffer  buf;
+  int              ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  buf[0].width=_yuv->y_width;
+  buf[0].height=_yuv->y_height;
+  buf[0].stride=_yuv->y_stride;
+  buf[0].data=_yuv->y;
+  buf[1].width=_yuv->uv_width;
+  buf[1].height=_yuv->uv_height;
+  buf[1].stride=_yuv->uv_stride;
+  buf[1].data=_yuv->u;
+  buf[2].width=_yuv->uv_width;
+  buf[2].height=_yuv->uv_height;
+  buf[2].stride=_yuv->uv_stride;
+  buf[2].data=_yuv->v;
+  ret=th_encode_ycbcr_in(api->encode,buf);
+  if(ret<0)return ret;
+  _te->granulepos=api->encode->state.granpos;
+  return ret;
+}
+
+int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){
+  th_api_wrapper *api;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  return th_encode_packetout(api->encode,_last_p,_op);
+}
+
+int theora_encode_header(theora_state *_te,ogg_packet *_op){
+  oc_enc_ctx     *enc;
+  th_api_wrapper *api;
+  int             ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  enc=api->encode;
+  /*If we've already started encoding, fail.*/
+  if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+    return TH_EINVAL;
+  }
+  /*Reset the state to make sure we output an info packet.*/
+  enc->packet_state=OC_PACKET_INFO_HDR;
+  ret=th_encode_flushheader(api->encode,NULL,_op);
+  return ret>=0?0:ret;
+}
+
+int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){
+  oggpack_buffer  opb;
+  void           *buf;
+  int             packet_state;
+  int             ret;
+  packet_state=OC_PACKET_COMMENT_HDR;
+  oggpackB_writeinit(&opb);
+  ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL,
+   th_version_string(),(th_comment *)_tc,_op);
+  if(ret>=0){
+    /*The oggpack_buffer's lifetime ends with this function, so we have to
+       copy out the packet contents.
+      Presumably the application knows it is supposed to free this.
+      This part works nothing like the Vorbis API, and the documentation on it
+       has been wrong for some time, claiming libtheora owned the memory.*/
+    buf=_ogg_malloc(_op->bytes);
+    if(buf==NULL){
+      _op->packet=NULL;
+      ret=TH_EFAULT;
+    }
+    else{
+      memcpy(buf,_op->packet,_op->bytes);
+      _op->packet=buf;
+      ret=0;
+    }
+  }
+  oggpack_writeclear(&opb);
+  return ret;
+}
+
+int theora_encode_tables(theora_state *_te,ogg_packet *_op){
+  oc_enc_ctx     *enc;
+  th_api_wrapper *api;
+  int             ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  enc=api->encode;
+  /*If we've already started encoding, fail.*/
+  if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+    return TH_EINVAL;
+  }
+  /*Reset the state to make sure we output a setup packet.*/
+  enc->packet_state=OC_PACKET_SETUP_HDR;
+  ret=th_encode_flushheader(api->encode,NULL,_op);
+  return ret>=0?0:ret;
+}

+ 388 - 0
Engine/lib/libtheora/lib/encfrag.c

@@ -0,0 +1,388 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: encfrag.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  (*_enc->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride);
+}
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+}
+
+void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride){
+  (*_enc->opt_vtable.frag_sub_128)(_diff,_src,_ystride);
+}
+
+void oc_enc_frag_sub_128_c(ogg_int16_t *_diff,
+ const unsigned char *_src,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128);
+    _src+=_ystride;
+  }
+}
+
+unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_x,
+ const unsigned char *_y,int _ystride){
+  return (*_enc->opt_vtable.frag_sad)(_x,_y,_ystride);
+}
+
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh){
+  return (*_enc->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh);
+}
+
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+    if(sad>_thresh)break;
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh){
+  return (*_enc->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,
+   _thresh);
+}
+
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1));
+    if(sad>_thresh)break;
+    _src+=_ystride;
+    _ref1+=_ystride;
+    _ref2+=_ystride;
+  }
+  return sad;
+}
+
+static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    t0=_src[0]-_ref[0]+_src[4]-_ref[4];
+    t4=_src[0]-_ref[0]-_src[4]+_ref[4];
+    t1=_src[1]-_ref[1]+_src[5]-_ref[5];
+    t5=_src[1]-_ref[1]-_src[5]+_ref[5];
+    t2=_src[2]-_ref[2]+_src[6]-_ref[6];
+    t6=_src[2]-_ref[2]-_src[6]+_ref[6];
+    t3=_src[3]-_ref[3]+_src[7]-_ref[7];
+    t7=_src[3]-_ref[3]-_src[7]+_ref[7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+}
+
+static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    r=_ref1[0]+_ref2[0]>>1;
+    t4=_ref1[4]+_ref2[4]>>1;
+    t0=_src[0]-r+_src[4]-t4;
+    t4=_src[0]-r-_src[4]+t4;
+    r=_ref1[1]+_ref2[1]>>1;
+    t5=_ref1[5]+_ref2[5]>>1;
+    t1=_src[1]-r+_src[5]-t5;
+    t5=_src[1]-r-_src[5]+t5;
+    r=_ref1[2]+_ref2[2]>>1;
+    t6=_ref1[6]+_ref2[6]>>1;
+    t2=_src[2]-r+_src[6]-t6;
+    t6=_src[2]-r-_src[6]+t6;
+    r=_ref1[3]+_ref2[3]>>1;
+    t7=_ref1[7]+_ref2[7]>>1;
+    t3=_src[3]-r+_src[7]-t7;
+    t7=_src[3]-r-_src[7]+t7;
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+    _ref1+=_ystride;
+    _ref2+=_ystride;
+  }
+}
+
+static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    t0=_src[0]+_src[4];
+    t4=_src[0]-_src[4];
+    t1=_src[1]+_src[5];
+    t5=_src[1]-_src[5];
+    t2=_src[2]+_src[6];
+    t6=_src[2]-_src[6];
+    t3=_src[3]+_src[7];
+    t7=_src[3]-_src[7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+  }
+}
+
+unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){
+  unsigned    sad;
+  int         t0;
+  int         t1;
+  int         t2;
+  int         t3;
+  int         t4;
+  int         t5;
+  int         t6;
+  int         t7;
+  int         r;
+  int         i;
+  sad=0;
+  for(i=0;i<8;i++){
+    /*Hadamard stage 1:*/
+    t0=_buf[i*8+0]+_buf[i*8+4];
+    t4=_buf[i*8+0]-_buf[i*8+4];
+    t1=_buf[i*8+1]+_buf[i*8+5];
+    t5=_buf[i*8+1]-_buf[i*8+5];
+    t2=_buf[i*8+2]+_buf[i*8+6];
+    t6=_buf[i*8+2]-_buf[i*8+6];
+    t3=_buf[i*8+3]+_buf[i*8+7];
+    t7=_buf[i*8+3]-_buf[i*8+7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    r=abs(t0+t1);
+    r+=abs(t0-t1);
+    r+=abs(t2+t3);
+    r+=abs(t2-t3);
+    r+=abs(t4+t5);
+    r+=abs(t4-t5);
+    r+=abs(t6+t7);
+    r+=abs(t6-t7);
+    sad+=r;
+    if(sad>_thresh)break;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh){
+  return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh);
+}
+
+unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  ogg_int16_t buf[64];
+  oc_diff_hadamard(buf,_src,_ref,_ystride);
+  return oc_hadamard_sad_thresh(buf,_thresh);
+}
+
+unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh){
+  return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride,
+   _thresh);
+}
+
+unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  ogg_int16_t buf[64];
+  oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride);
+  return oc_hadamard_sad_thresh(buf,_thresh);
+}
+
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_src,int _ystride){
+  return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride);
+}
+
+unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){
+  ogg_int16_t buf[64];
+  oc_intra_hadamard(buf,_src,_ystride);
+  return oc_hadamard_sad_thresh(buf,UINT_MAX)
+   -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
+}
+
+void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+  (*_enc->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride);
+}
+
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+  int i;
+  int j;
+  for(i=8;i-->0;){
+    for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1;
+    _dst+=_ystride;
+    _src1+=_ystride;
+    _src2+=_ystride;
+  }
+}
+
+void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
+ unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]){
+  (*_enc->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue);
+}
+
+void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  (*_enc->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue);
+}

+ 121 - 0
Engine/lib/libtheora/lib/encinfo.c

@@ -0,0 +1,121 @@
+#include <stdlib.h>
+#include <string.h>
+#include "internal.h"
+#include "enquant.h"
+#include "huffenc.h"
+
+
+
+/*Packs a series of octets from a given byte array into the pack buffer.
+  _opb: The pack buffer to store the octets in.
+  _buf: The byte array containing the bytes to pack.
+  _len: The number of octets to pack.*/
+static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){
+  int i;
+  for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8);
+}
+
+
+
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op){
+  unsigned char *packet;
+  int            b_o_s;
+  if(_op==NULL)return TH_EFAULT;
+  switch(*_packet_state){
+    /*Codec info header.*/
+    case OC_PACKET_INFO_HDR:{
+      if(_state==NULL)return TH_EFAULT;
+      oggpackB_reset(_opb);
+      /*Mark this packet as the info header.*/
+      oggpackB_write(_opb,0x80,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the codec bitstream version.*/
+      oggpackB_write(_opb,TH_VERSION_MAJOR,8);
+      oggpackB_write(_opb,TH_VERSION_MINOR,8);
+      oggpackB_write(_opb,TH_VERSION_SUB,8);
+      /*Describe the encoded frame.*/
+      oggpackB_write(_opb,_state->info.frame_width>>4,16);
+      oggpackB_write(_opb,_state->info.frame_height>>4,16);
+      oggpackB_write(_opb,_state->info.pic_width,24);
+      oggpackB_write(_opb,_state->info.pic_height,24);
+      oggpackB_write(_opb,_state->info.pic_x,8);
+      oggpackB_write(_opb,_state->info.pic_y,8);
+      oggpackB_write(_opb,_state->info.fps_numerator,32);
+      oggpackB_write(_opb,_state->info.fps_denominator,32);
+      oggpackB_write(_opb,_state->info.aspect_numerator,24);
+      oggpackB_write(_opb,_state->info.aspect_denominator,24);
+      oggpackB_write(_opb,_state->info.colorspace,8);
+      oggpackB_write(_opb,_state->info.target_bitrate,24);
+      oggpackB_write(_opb,_state->info.quality,6);
+      oggpackB_write(_opb,_state->info.keyframe_granule_shift,5);
+      oggpackB_write(_opb,_state->info.pixel_fmt,2);
+      /*Spare configuration bits.*/
+      oggpackB_write(_opb,0,3);
+      b_o_s=1;
+    }break;
+    /*Comment header.*/
+    case OC_PACKET_COMMENT_HDR:{
+      int vendor_len;
+      int i;
+      if(_tc==NULL)return TH_EFAULT;
+      vendor_len=strlen(_vendor);
+      oggpackB_reset(_opb);
+      /*Mark this packet as the comment header.*/
+      oggpackB_write(_opb,0x81,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the vendor string.*/
+      oggpack_write(_opb,vendor_len,32);
+      oc_pack_octets(_opb,_vendor,vendor_len);
+      oggpack_write(_opb,_tc->comments,32);
+      for(i=0;i<_tc->comments;i++){
+        if(_tc->user_comments[i]!=NULL){
+          oggpack_write(_opb,_tc->comment_lengths[i],32);
+          oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]);
+        }
+        else oggpack_write(_opb,0,32);
+      }
+      b_o_s=0;
+    }break;
+    /*Codec setup header.*/
+    case OC_PACKET_SETUP_HDR:{
+      int ret;
+      oggpackB_reset(_opb);
+      /*Mark this packet as the setup header.*/
+      oggpackB_write(_opb,0x82,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the quantizer tables.*/
+      oc_quant_params_pack(_opb,_qinfo);
+      /*Write the huffman codes.*/
+      ret=oc_huff_codes_pack(_opb,_codes);
+      /*This should never happen, because we validate the tables when they
+         are set.
+        If you see, it's a good chance memory is being corrupted.*/
+      if(ret<0)return ret;
+      b_o_s=0;
+    }break;
+    /*No more headers to emit.*/
+    default:return 0;
+  }
+  /*This is kind of fugly: we hand the user a buffer which they do not own.
+    We will overwrite it when the next packet is output, so the user better be
+     done with it by then.
+    Vorbis is little better: it hands back buffers that it will free the next
+     time the headers are requested, or when the encoder is cleared.
+    Hopefully libogg2 will make this much cleaner.*/
+  packet=oggpackB_get_buffer(_opb);
+  /*If there's no packet, malloc failed while writing.*/
+  if(packet==NULL)return TH_EFAULT;
+  _op->packet=packet;
+  _op->bytes=oggpackB_bytes(_opb);
+  _op->b_o_s=b_o_s;
+  _op->e_o_s=0;
+  _op->granulepos=0;
+  _op->packetno=*_packet_state+3;
+  return ++(*_packet_state)+3;
+}

+ 493 - 0
Engine/lib/libtheora/lib/encint.h

@@ -0,0 +1,493 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#if !defined(_encint_H)
+# define _encint_H (1)
+# if defined(HAVE_CONFIG_H)
+#  include "config.h"
+# endif
+# include "theora/theoraenc.h"
+# include "internal.h"
+# include "ocintrin.h"
+# include "mathops.h"
+# include "enquant.h"
+# include "huffenc.h"
+/*# define OC_COLLECT_METRICS*/
+
+
+
+typedef oc_mv                         oc_mv2[2];
+
+typedef struct oc_enc_opt_vtable      oc_enc_opt_vtable;
+typedef struct oc_mb_enc_info         oc_mb_enc_info;
+typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
+typedef struct oc_iir_filter          oc_iir_filter;
+typedef struct oc_frame_metrics       oc_frame_metrics;
+typedef struct oc_rc_state            oc_rc_state;
+typedef struct th_enc_ctx             oc_enc_ctx;
+typedef struct oc_token_checkpoint    oc_token_checkpoint;
+
+
+
+/*Constants for the packet-out state machine specific to the encoder.*/
+
+/*Next packet to emit: Data packet, but none are ready yet.*/
+#define OC_PACKET_EMPTY (0)
+/*Next packet to emit: Data packet, and one is ready.*/
+#define OC_PACKET_READY (1)
+
+/*All features enabled.*/
+#define OC_SP_LEVEL_SLOW       (0)
+/*Enable early skip.*/
+#define OC_SP_LEVEL_EARLY_SKIP (1)
+/*Disable motion compensation.*/
+#define OC_SP_LEVEL_NOMC       (2)
+/*Maximum valid speed level.*/
+#define OC_SP_LEVEL_MAX        (2)
+
+
+/*The bits used for each of the MB mode codebooks.*/
+extern const unsigned char OC_MODE_BITS[2][OC_NMODES];
+
+/*The bits used for each of the MV codebooks.*/
+extern const unsigned char OC_MV_BITS[2][64];
+
+/*The minimum value that can be stored in a SB run for each codeword.
+  The last entry is the upper bound on the length of a single SB run.*/
+extern const ogg_uint16_t  OC_SB_RUN_VAL_MIN[8];
+/*The bits used for each SB run codeword.*/
+extern const unsigned char OC_SB_RUN_CODE_NBITS[7];
+
+/*The bits used for each block run length (starting with 1).*/
+extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30];
+
+
+
+/*Encoder specific functions with accelerated variants.*/
+struct oc_enc_opt_vtable{
+  unsigned (*frag_sad)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  unsigned (*frag_sad_thresh)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride,unsigned _thresh);
+  unsigned (*frag_sad2_thresh)(const unsigned char *_src,
+   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+   unsigned _thresh);
+  unsigned (*frag_satd_thresh)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride,unsigned _thresh);
+  unsigned (*frag_satd2_thresh)(const unsigned char *_src,
+   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+   unsigned _thresh);
+  unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
+  void     (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  void     (*frag_sub_128)(ogg_int16_t _diff[64],
+   const unsigned char *_src,int _ystride);
+  void     (*frag_copy2)(unsigned char *_dst,
+   const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+  void     (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+   const ogg_int16_t _residue[64]);
+  void     (*frag_recon_inter)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+  void     (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+};
+
+
+void oc_enc_vtable_init(oc_enc_ctx *_enc);
+
+
+
+/*Encoder-specific macroblock information.*/
+struct oc_mb_enc_info{
+  /*Neighboring macro blocks that have MVs available from the current frame.*/
+  unsigned      cneighbors[4];
+  /*Neighboring macro blocks to use for MVs from the previous frame.*/
+  unsigned      pneighbors[4];
+  /*The number of current-frame neighbors.*/
+  unsigned char ncneighbors;
+  /*The number of previous-frame neighbors.*/
+  unsigned char npneighbors;
+  /*Flags indicating which MB modes have been refined.*/
+  unsigned char refined;
+  /*Motion vectors for a macro block for the current frame and the
+     previous two frames.
+    Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which
+     can be used to estimate constant velocity and constant acceleration
+     predictors.
+    Uninitialized MVs are (0,0).*/
+  oc_mv2        analysis_mv[3];
+  /*Current unrefined analysis MVs.*/
+  oc_mv         unref_mv[2];
+  /*Unrefined block MVs.*/
+  oc_mv         block_mv[4];
+  /*Refined block MVs.*/
+  oc_mv         ref_mv[4];
+  /*Minimum motion estimation error from the analysis stage.*/
+  ogg_uint16_t  error[2];
+  /*MB error for half-pel refinement for each frame type.*/
+  unsigned      satd[2];
+  /*Block error for half-pel refinement.*/
+  unsigned      block_satd[4];
+};
+
+
+
+/*State machine to estimate the opportunity cost of coding a MB mode.*/
+struct oc_mode_scheme_chooser{
+  /*Pointers to the a list containing the index of each mode in the mode
+     alphabet used by each scheme.
+    The first entry points to the dynamic scheme0_ranks, while the remaining 7
+     point to the constant entries stored in OC_MODE_SCHEMES.*/
+  const unsigned char *mode_ranks[8];
+  /*The ranks for each mode when coded with scheme 0.
+    These are optimized so that the more frequent modes have lower ranks.*/
+  unsigned char        scheme0_ranks[OC_NMODES];
+  /*The list of modes, sorted in descending order of frequency, that
+    corresponds to the ranks above.*/
+  unsigned char        scheme0_list[OC_NMODES];
+  /*The number of times each mode has been chosen so far.*/
+  int                  mode_counts[OC_NMODES];
+  /*The list of mode coding schemes, sorted in ascending order of bit cost.*/
+  unsigned char        scheme_list[8];
+  /*The number of bits used by each mode coding scheme.*/
+  ptrdiff_t            scheme_bits[8];
+};
+
+
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
+
+
+
+/*A 2nd order low-pass Bessel follower.
+  We use this for rate control because it has fast reaction time, but is
+   critically damped.*/
+struct oc_iir_filter{
+  ogg_int32_t c[2];
+  ogg_int64_t g;
+  ogg_int32_t x[2];
+  ogg_int32_t y[2];
+};
+
+
+
+/*The 2-pass metrics associated with a single frame.*/
+struct oc_frame_metrics{
+  /*The log base 2 of the scale factor for this frame in Q24 format.*/
+  ogg_int32_t   log_scale;
+  /*The number of application-requested duplicates of this frame.*/
+  unsigned      dup_count:31;
+  /*The frame type from pass 1.*/
+  unsigned      frame_type:1;
+};
+
+
+
+/*Rate control state information.*/
+struct oc_rc_state{
+  /*The target average bits per frame.*/
+  ogg_int64_t        bits_per_frame;
+  /*The current buffer fullness (bits available to be used).*/
+  ogg_int64_t        fullness;
+  /*The target buffer fullness.
+    This is where we'd like to be by the last keyframe the appears in the next
+     buf_delay frames.*/
+  ogg_int64_t        target;
+  /*The maximum buffer fullness (total size of the buffer).*/
+  ogg_int64_t        max;
+  /*The log of the number of pixels in a frame in Q57 format.*/
+  ogg_int64_t        log_npixels;
+  /*The exponent used in the rate model in Q8 format.*/
+  unsigned           exp[2];
+  /*The number of frames to distribute the buffer usage over.*/
+  int                buf_delay;
+  /*The total drop count from the previous frame.
+    This includes duplicates explicitly requested via the
+     TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/
+  ogg_uint32_t       prev_drop_count;
+  /*The log of an estimated scale factor used to obtain the real framerate, for
+     VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/
+  ogg_int64_t        log_drop_scale;
+  /*The log of estimated scale factor for the rate model in Q57 format.*/
+  ogg_int64_t        log_scale[2];
+  /*The log of the target quantizer level in Q57 format.*/
+  ogg_int64_t        log_qtarget;
+  /*Will we drop frames to meet bitrate target?*/
+  unsigned char      drop_frames;
+  /*Do we respect the maximum buffer fullness?*/
+  unsigned char      cap_overflow;
+  /*Can the reservoir go negative?*/
+  unsigned char      cap_underflow;
+  /*Second-order lowpass filters to track scale and VFR.*/
+  oc_iir_filter      scalefilter[2];
+  int                inter_count;
+  int                inter_delay;
+  int                inter_delay_target;
+  oc_iir_filter      vfrfilter;
+  /*Two-pass mode state.
+    0 => 1-pass encoding.
+    1 => 1st pass of 2-pass encoding.
+    2 => 2nd pass of 2-pass encoding.*/
+  int                twopass;
+  /*Buffer for current frame metrics.*/
+  unsigned char      twopass_buffer[48];
+  /*The number of bytes in the frame metrics buffer.
+    When 2-pass encoding is enabled, this is set to 0 after each frame is
+     submitted, and must be non-zero before the next frame will be accepted.*/
+  int                twopass_buffer_bytes;
+  int                twopass_buffer_fill;
+  /*Whether or not to force the next frame to be a keyframe.*/
+  unsigned char      twopass_force_kf;
+  /*The metrics for the previous frame.*/
+  oc_frame_metrics   prev_metrics;
+  /*The metrics for the current frame.*/
+  oc_frame_metrics   cur_metrics;
+  /*The buffered metrics for future frames.*/
+  oc_frame_metrics  *frame_metrics;
+  int                nframe_metrics;
+  int                cframe_metrics;
+  /*The index of the current frame in the circular metric buffer.*/
+  int                frame_metrics_head;
+  /*The frame count of each type (keyframes, delta frames, and dup frames);
+     32 bits limits us to 2.268 years at 60 fps.*/
+  ogg_uint32_t       frames_total[3];
+  /*The number of frames of each type yet to be processed.*/
+  ogg_uint32_t       frames_left[3];
+  /*The sum of the scale values for each frame type.*/
+  ogg_int64_t        scale_sum[2];
+  /*The start of the window over which the current scale sums are taken.*/
+  int                scale_window0;
+  /*The end of the window over which the current scale sums are taken.*/
+  int                scale_window_end;
+  /*The frame count of each type in the current 2-pass window; this does not
+     include dup frames.*/
+  int                nframes[3];
+  /*The total accumulated estimation bias.*/
+  ogg_int64_t        rate_bias;
+};
+
+
+void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc);
+void oc_rc_state_clear(oc_rc_state *_rc);
+
+void oc_enc_rc_resize(oc_enc_ctx *_enc);
+int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp);
+void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type);
+int oc_enc_update_rc_state(oc_enc_ctx *_enc,
+ long _bits,int _qti,int _qi,int _trial,int _droppable);
+int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf);
+int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes);
+
+
+
+/*The internal encoder state.*/
+struct th_enc_ctx{
+  /*Shared encoder/decoder state.*/
+  oc_theora_state          state;
+  /*Buffer in which to assemble packets.*/
+  oggpack_buffer           opb;
+  /*Encoder-specific macroblock information.*/
+  oc_mb_enc_info          *mb_info;
+  /*DC coefficients after prediction.*/
+  ogg_int16_t             *frag_dc;
+  /*The list of coded macro blocks, in coded order.*/
+  unsigned                *coded_mbis;
+  /*The number of coded macro blocks.*/
+  size_t                   ncoded_mbis;
+  /*Whether or not packets are ready to be emitted.
+    This takes on negative values while there are remaining header packets to
+     be emitted, reaches 0 when the codec is ready for input, and becomes
+     positive when a frame has been processed and data packets are ready.*/
+  int                      packet_state;
+  /*The maximum distance between keyframes.*/
+  ogg_uint32_t             keyframe_frequency_force;
+  /*The number of duplicates to produce for the next frame.*/
+  ogg_uint32_t             dup_count;
+  /*The number of duplicates remaining to be emitted for the current frame.*/
+  ogg_uint32_t             nqueued_dups;
+  /*The number of duplicates emitted for the last frame.*/
+  ogg_uint32_t             prev_dup_count;
+  /*The current speed level.*/
+  int                      sp_level;
+  /*Whether or not VP3 compatibility mode has been enabled.*/
+  unsigned char            vp3_compatible;
+  /*Whether or not any INTER frames have been coded.*/
+  unsigned char            coded_inter_frame;
+  /*Whether or not previous frame was dropped.*/
+  unsigned char            prevframe_dropped;
+  /*Stores most recently chosen Huffman tables for each frame type, DC and AC
+     coefficients, and luma and chroma tokens.
+    The actual Huffman table used for a given coefficient depends not only on
+     the choice made here, but also its index in the zig-zag ordering.*/
+  unsigned char            huff_idxs[2][2][2];
+  /*Current count of bits used by each MV coding mode.*/
+  size_t                   mv_bits[2];
+  /*The mode scheme chooser for estimating mode coding costs.*/
+  oc_mode_scheme_chooser   chooser;
+  /*The number of vertical super blocks in an MCU.*/
+  int                      mcu_nvsbs;
+  /*The SSD error for skipping each fragment in the current MCU.*/
+  unsigned                *mcu_skip_ssd;
+  /*The DCT token lists for each coefficient and each plane.*/
+  unsigned char          **dct_tokens[3];
+  /*The extra bits associated with each DCT token.*/
+  ogg_uint16_t           **extra_bits[3];
+  /*The number of DCT tokens for each coefficient for each plane.*/
+  ptrdiff_t                ndct_tokens[3][64];
+  /*Pending EOB runs for each coefficient for each plane.*/
+  ogg_uint16_t             eob_run[3][64];
+  /*The offset of the first DCT token for each coefficient for each plane.*/
+  unsigned char            dct_token_offs[3][64];
+  /*The last DC coefficient for each plane and reference frame.*/
+  int                      dc_pred_last[3][3];
+#if defined(OC_COLLECT_METRICS)
+  /*Fragment SATD statistics for MB mode estimation metrics.*/
+  unsigned                *frag_satd;
+  /*Fragment SSD statistics for MB mode estimation metrics.*/
+  unsigned                *frag_ssd;
+#endif
+  /*The R-D optimization parameter.*/
+  int                      lambda;
+  /*The huffman tables in use.*/
+  th_huff_code             huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+  /*The quantization parameters in use.*/
+  th_quant_info            qinfo;
+  oc_iquant               *enquant_tables[64][3][2];
+  oc_iquant_table          enquant_table_data[64][3][2];
+  /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi
+     value.
+    This is used to paramterize the rate control decisions.
+    They are kept in the log domain to simplify later processing.
+    Keep in mind these are DCT domain quantizers, and so are scaled by an
+     additional factor of 4 from the pixel domain.*/
+  ogg_int64_t              log_qavg[2][64];
+  /*The buffer state used to drive rate control.*/
+  oc_rc_state              rc;
+  /*Table for encoder acceleration functions.*/
+  oc_enc_opt_vtable        opt_vtable;
+};
+
+
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode);
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode);
+#if defined(OC_COLLECT_METRICS)
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc);
+void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc);
+#endif
+
+
+
+/*Perform fullpel motion search for a single MB against both reference frames.*/
+void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi);
+/*Refine a MB MV for one frame.*/
+void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame);
+/*Refine the block MVs.*/
+void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi);
+
+
+
+/*Used to rollback a tokenlog transaction when we retroactively decide to skip
+   a fragment.
+  A checkpoint is taken right before each token is added.*/
+struct oc_token_checkpoint{
+  /*The color plane the token was added to.*/
+  unsigned char pli;
+  /*The zig-zag index the token was added to.*/
+  unsigned char zzi;
+  /*The outstanding EOB run count before the token was added.*/
+  ogg_uint16_t  eob_run;
+  /*The token count before the token was added.*/
+  ptrdiff_t     ndct_tokens;
+};
+
+
+
+void oc_enc_tokenize_start(oc_enc_ctx *_enc);
+int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _acmin);
+void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
+ const oc_token_checkpoint *_stack,int _n);
+void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
+ int _pli,int _fragy0,int _frag_yend);
+void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
+ const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
+ int _prev_ndct_tokens1,int _prev_eob_run1);
+void oc_enc_tokenize_finish(oc_enc_ctx *_enc);
+
+
+
+/*Utility routine to encode one of the header packets.*/
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op);
+
+
+
+/*Encoder-specific accelerated functions.*/
+void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+ const unsigned char *_src,const unsigned char *_ref1,
+ const unsigned char *_ref2,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_src,int _ystride);
+void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc,
+ unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]);
+void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
+ const ogg_int16_t _x[64]);
+
+/*Default pure-C implementations.*/
+void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
+void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+
+#endif

+ 1615 - 0
Engine/lib/libtheora/lib/encode.c

@@ -0,0 +1,1615 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: encode.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+#if defined(OC_X86_ASM)
+# include "x86/x86enc.h"
+#endif
+
+
+
+/*The default quantization parameters used by VP3.1.*/
+static const int OC_VP31_RANGE_SIZES[1]={63};
+static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  },
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  }
+};
+static const th_quant_base OC_VP31_BASES_INTRA_C[2]={
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  },
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  }
+};
+static const th_quant_base OC_VP31_BASES_INTER[2]={
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  },
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  }
+};
+
+const th_quant_info TH_VP31_QUANT_INFO={
+  {
+    220,200,190,180,170,170,160,160,
+    150,150,140,140,130,130,120,120,
+    110,110,100,100, 90, 90, 90, 80,
+     80, 80, 70, 70, 70, 60, 60, 60,
+     60, 50, 50, 50, 50, 40, 40, 40,
+     40, 40, 30, 30, 30, 30, 30, 30,
+     30, 20, 20, 20, 20, 20, 20, 20,
+     20, 10, 10, 10, 10, 10, 10, 10
+  },
+  {
+    500,450,400,370,340,310,285,265,
+    245,225,210,195,185,180,170,160,
+    150,145,135,130,125,115,110,107,
+    100, 96, 93, 89, 85, 82, 75, 74,
+     70, 68, 64, 60, 57, 56, 52, 50,
+     49, 45, 44, 43, 40, 38, 37, 35,
+     33, 32, 30, 29, 28, 25, 24, 22,
+     21, 19, 18, 17, 15, 13, 12, 10
+  },
+  {
+    30,25,20,20,15,15,14,14,
+    13,13,12,12,11,11,10,10,
+     9, 9, 8, 8, 7, 7, 7, 7,
+     6, 6, 6, 6, 5, 5, 5, 5,
+     4, 4, 4, 4, 3, 3, 3, 3,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0
+  },
+  {
+    {
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}
+    },
+    {
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}
+    }
+  }
+};
+
+/*The current default quantization parameters.*/
+static const int OC_DEF_QRANGE_SIZES[3]={32,16,15};
+static const th_quant_base OC_DEF_BASES_INTRA_Y[4]={
+  {
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+  },
+  {
+     15, 12, 12, 15, 18, 20, 20, 21,
+     13, 13, 14, 17, 18, 21, 21, 20,
+     14, 14, 15, 18, 20, 21, 21, 21,
+     14, 16, 17, 19, 20, 21, 21, 21,
+     16, 17, 20, 21, 21, 21, 21, 21,
+     18, 19, 20, 21, 21, 21, 21, 21,
+     20, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21
+  },
+  {
+     16, 12, 11, 16, 20, 25, 27, 28,
+     13, 13, 14, 18, 21, 28, 28, 27,
+     14, 13, 16, 20, 25, 28, 28, 28,
+     14, 16, 19, 22, 27, 29, 29, 28,
+     17, 19, 25, 28, 28, 30, 30, 29,
+     20, 24, 27, 28, 29, 30, 30, 29,
+     27, 28, 29, 29, 30, 30, 30, 30,
+     29, 29, 29, 29, 30, 30, 30, 29
+  },
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  }
+};
+static const th_quant_base OC_DEF_BASES_INTRA_C[4]={
+  {
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19
+  },
+  {
+     18, 18, 21, 25, 26, 26, 26, 26,
+     18, 20, 22, 26, 26, 26, 26, 26,
+     21, 22, 25, 26, 26, 26, 26, 26,
+     25, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26
+  },
+  {
+     17, 18, 22, 31, 36, 36, 36, 36,
+     18, 20, 24, 34, 36, 36, 36, 36,
+     22, 24, 33, 36, 36, 36, 36, 36,
+     31, 34, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36
+  },
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  }
+};
+static const th_quant_base OC_DEF_BASES_INTER[4]={
+  {
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21
+  },
+  {
+     18, 18, 18, 21, 23, 24, 25, 27,
+     18, 18, 21, 23, 24, 25, 27, 28,
+     18, 21, 23, 24, 25, 27, 28, 29,
+     21, 23, 24, 25, 27, 28, 29, 29,
+     23, 24, 25, 27, 28, 29, 29, 29,
+     24, 25, 27, 28, 29, 29, 29, 30,
+     25, 27, 28, 29, 29, 29, 30, 30,
+     27, 28, 29, 29, 29, 30, 30, 30
+  },
+  {
+     17, 17, 17, 20, 23, 26, 28, 32,
+     17, 17, 20, 23, 26, 28, 32, 34,
+     17, 20, 23, 26, 28, 32, 34, 37,
+     20, 23, 26, 28, 32, 34, 37, 37,
+     23, 26, 28, 32, 34, 37, 37, 37,
+     26, 28, 32, 34, 37, 37, 37, 41,
+     28, 32, 34, 37, 37, 37, 41, 42,
+     32, 34, 37, 37, 37, 41, 42, 42
+  },
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  }
+};
+
+const th_quant_info TH_DEF_QUANT_INFO={
+  {
+    365,348,333,316,300,287,277,265,
+    252,240,229,219,206,197,189,180,
+    171,168,160,153,146,139,132,127,
+    121,115,110,107,101, 97, 94, 89,
+     85, 83, 78, 73, 72, 67, 66, 62,
+     60, 59, 56, 53, 52, 48, 47, 43,
+     42, 40, 36, 35, 34, 33, 31, 30,
+     28, 25, 24, 22, 20, 17, 14, 10
+  },
+  {
+    365,348,333,316,300,287,277,265,
+    252,240,229,219,206,197,189,180,
+    171,168,160,153,146,139,132,127,
+    121,115,110,107,101, 97, 94, 89,
+     85, 83, 78, 73, 72, 67, 66, 62,
+     60, 59, 56, 53, 52, 48, 47, 43,
+     42, 40, 36, 35, 34, 33, 31, 30,
+     28, 25, 24, 22, 20, 17, 14, 10
+  },
+  {
+    30,25,20,20,15,15,14,14,
+    13,13,12,12,11,11,10,10,
+     9, 9, 8, 8, 7, 7, 7, 7,
+     6, 6, 6, 6, 5, 5, 5, 5,
+     4, 4, 4, 4, 3, 3, 3, 3,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0
+  },
+  {
+    {
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_Y},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C}
+    },
+    {
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}
+    }
+  }
+};
+
+
+
+/*The Huffman codes used for macro block modes.*/
+
+const unsigned char OC_MODE_BITS[2][OC_NMODES]={
+  /*Codebook 0: a maximally skewed prefix code.*/
+  {1,2,3,4,5,6,7,7},
+  /*Codebook 1: a fixed-length code.*/
+  {3,3,3,3,3,3,3,3}
+};
+
+static const unsigned char OC_MODE_CODES[2][OC_NMODES]={
+  /*Codebook 0: a maximally skewed prefix code.*/
+  {0x00,0x02,0x06,0x0E,0x1E,0x3E,0x7E,0x7F},
+  /*Codebook 1: a fixed-length code.*/
+  {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07}
+};
+
+
+/*The Huffman codes used for motion vectors.*/
+
+const unsigned char OC_MV_BITS[2][64]={
+  /*Codebook 0: VLC code.*/
+  {
+      8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+    8,7,7,7,7,7,7,7,7,6,6,6,6,4,4,3,
+    3,
+    3,4,4,6,6,6,6,7,7,7,7,7,7,7,7,8,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+  },
+  /*Codebook 1: (5 bit magnitude, 1 bit sign).
+    This wastes a code word (0x01, negative zero), or a bit (0x00, positive
+     zero, requires only 5 bits to uniquely decode), but is hopefully not used
+     very often.*/
+  {
+      6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
+  }
+};
+
+static const unsigned char OC_MV_CODES[2][64]={
+  /*Codebook 0: VLC code.*/
+  {
+         0xFF,0xFD,0xFB,0xF9,0xF7,0xF5,0xF3,
+    0xF1,0xEF,0xED,0xEB,0xE9,0xE7,0xE5,0xE3,
+    0xE1,0x6F,0x6D,0x6B,0x69,0x67,0x65,0x63,
+    0x61,0x2F,0x2D,0x2B,0x29,0x09,0x07,0x02,
+    0x00,
+    0x01,0x06,0x08,0x28,0x2A,0x2C,0x2E,0x60,
+    0x62,0x64,0x66,0x68,0x6A,0x6C,0x6E,0xE0,
+    0xE2,0xE4,0xE6,0xE8,0xEA,0xEC,0xEE,0xF0,
+    0xF2,0xF4,0xF6,0xF8,0xFA,0xFC,0xFE
+  },
+  /*Codebook 1: (5 bit magnitude, 1 bit sign).*/
+  {
+         0x3F,0x3D,0x3B,0x39,0x37,0x35,0x33,
+    0x31,0x2F,0x2D,0x2B,0x29,0x27,0x25,0x23,
+    0x21,0x1F,0x1D,0x1B,0x19,0x17,0x15,0x13,
+    0x11,0x0F,0x0D,0x0B,0x09,0x07,0x05,0x03,
+    0x00,
+    0x02,0x04,0x06,0x08,0x0A,0x0C,0x0E,0x10,
+    0x12,0x14,0x16,0x18,0x1A,0x1C,0x1E,0x20,
+    0x22,0x24,0x26,0x28,0x2A,0x2C,0x2E,0x30,
+    0x32,0x34,0x36,0x38,0x3A,0x3C,0x3E
+  }
+};
+
+
+
+/*Super block run coding scheme:
+   Codeword             Run Length
+   0                       1
+   10x                     2-3
+   110x                    4-5
+   1110xx                  6-9
+   11110xxx                10-17
+   111110xxxx              18-33
+   111111xxxxxxxxxxxx      34-4129*/
+const ogg_uint16_t    OC_SB_RUN_VAL_MIN[8]={1,2,4,6,10,18,34,4130};
+static const unsigned OC_SB_RUN_CODE_PREFIX[7]={
+  0,4,0xC,0x38,0xF0,0x3E0,0x3F000
+};
+const unsigned char   OC_SB_RUN_CODE_NBITS[7]={1,3,4,6,8,10,18};
+
+
+/*Writes the bit pattern for the run length of a super block run to the given
+   oggpack_buffer.
+  _opb:       The buffer to write to.
+  _run_count: The length of the run, which must be positive.
+  _flag:      The current flag.
+  _done:      Whether or not more flags are to be encoded.*/
+static void oc_sb_run_pack(oggpack_buffer *_opb,ptrdiff_t _run_count,
+ int _flag,int _done){
+  int i;
+  if(_run_count>=4129){
+    do{
+      oggpackB_write(_opb,0x3FFFF,18);
+      _run_count-=4129;
+      if(_run_count>0)oggpackB_write(_opb,_flag,1);
+      else if(!_done)oggpackB_write(_opb,!_flag,1);
+    }
+    while(_run_count>=4129);
+    if(_run_count<=0)return;
+  }
+  for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
+  oggpackB_write(_opb,OC_SB_RUN_CODE_PREFIX[i]+_run_count-OC_SB_RUN_VAL_MIN[i],
+   OC_SB_RUN_CODE_NBITS[i]);
+}
+
+
+
+/*Block run coding scheme:
+   Codeword             Run Length
+   0x                      1-2
+   10x                     3-4
+   110x                    5-6
+   1110xx                  7-10
+   11110xx                 11-14
+   11111xxxx               15-30*/
+const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]={
+  2,2,3,3,4,4,6,6,6,6,7,7,7,7,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9
+};
+static const ogg_uint16_t  OC_BLOCK_RUN_CODE_PATTERN[30]={
+        0x000,0x001,0x004,0x005,0x00C,0x00D,0x038,
+  0x039,0x03A,0x03B,0x078,0x079,0x07A,0x07B,0x1F0,
+  0x1F1,0x1F2,0x1F3,0x1F4,0x1F5,0x1F6,0x1F7,0x1F8,
+  0x1F9,0x1FA,0x1FB,0x1FC,0x1FD,0x1FE,0x1FF
+};
+
+
+/*Writes the bit pattern for the run length of a block run to the given
+   oggpack_buffer.
+  _opb:       The buffer to write to.
+  _run_count: The length of the run.
+              This must be positive, and no more than 30.*/
+static void oc_block_run_pack(oggpack_buffer *_opb,int _run_count){
+  oggpackB_write(_opb,OC_BLOCK_RUN_CODE_PATTERN[_run_count-1],
+   OC_BLOCK_RUN_CODE_NBITS[_run_count-1]);
+}
+
+
+
+static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){
+  /*Mark this as a data packet.*/
+  oggpackB_write(&_enc->opb,0,1);
+  /*Output the frame type (key frame or delta frame).*/
+  oggpackB_write(&_enc->opb,_enc->state.frame_type,1);
+  /*Write out the current qi list.*/
+  oggpackB_write(&_enc->opb,_enc->state.qis[0],6);
+  if(_enc->state.nqis>1){
+    oggpackB_write(&_enc->opb,1,1);
+    oggpackB_write(&_enc->opb,_enc->state.qis[1],6);
+    if(_enc->state.nqis>2){
+      oggpackB_write(&_enc->opb,1,1);
+      oggpackB_write(&_enc->opb,_enc->state.qis[2],6);
+    }
+    else oggpackB_write(&_enc->opb,0,1);
+  }
+  else oggpackB_write(&_enc->opb,0,1);
+  if(_enc->state.frame_type==OC_INTRA_FRAME){
+    /*Key frames have 3 unused configuration bits, holdovers from the VP3 days.
+      Most of the other unused bits in the VP3 headers were eliminated.
+      Monty kept these to leave us some wiggle room for future expansion,
+       though a single bit in all frames would have been far more useful.*/
+    oggpackB_write(&_enc->opb,0,3);
+  }
+}
+
+/*Writes the bit flags for whether or not each super block is partially coded
+   or not.
+  These flags are run-length encoded, with the flag value alternating between
+   each run.
+  Return: The number partially coded SBs.*/
+static unsigned oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  int                flag;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  flag=sb_flags[0].coded_partially;
+  oggpackB_write(&_enc->opb,flag,1);
+  sbi=npartial=0;
+  do{
+    unsigned run_count;
+    for(run_count=0;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially!=flag)break;
+      run_count++;
+      npartial+=flag;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,sbi>=nsbs);
+    flag=!flag;
+  }
+  while(sbi<nsbs);
+  return npartial;
+}
+
+/*Writes the coded/not coded flags for each super block that is not partially
+   coded.
+  These flags are run-length encoded, with the flag value altenating between
+   each run.*/
+static void oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                flag;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  /*Skip partially coded super blocks; their flags have already been coded.*/
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+  flag=sb_flags[sbi].coded_fully;
+  oggpackB_write(&_enc->opb,flag,1);
+  do{
+    unsigned run_count;
+    for(run_count=0;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
+      if(sb_flags[sbi].coded_fully!=flag)break;
+      run_count++;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,sbi>=nsbs);
+    flag=!flag;
+  }
+  while(sbi<nsbs);
+}
+
+static void oc_enc_coded_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  const oc_fragment *frags;
+  unsigned           npartial;
+  int                run_count;
+  int                flag;
+  int                pli;
+  unsigned           sbi;
+  npartial=oc_enc_partial_sb_flags_pack(_enc);
+  if(npartial<_enc->state.nsbs)oc_enc_coded_sb_flags_pack(_enc);
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  frags=_enc->state.frags;
+  for(sbi=0;sbi<nsbs&&!sb_flags[sbi].coded_partially;sbi++);
+  /*If there's at least one partial SB, store individual coded block flags.*/
+  if(sbi<nsbs){
+    flag=frags[sb_maps[sbi][0][0]].coded;
+    oggpackB_write(&_enc->opb,flag,1);
+    run_count=0;
+    nsbs=sbi=0;
+    for(pli=0;pli<3;pli++){
+      nsbs+=_enc->state.fplanes[pli].nsbs;
+      for(;sbi<nsbs;sbi++){
+        int       quadi;
+        int       bi;
+        ptrdiff_t fragi;
+        if(sb_flags[sbi].coded_partially){
+          for(quadi=0;quadi<4;quadi++){
+            for(bi=0;bi<4;bi++){
+              fragi=sb_maps[sbi][quadi][bi];
+              if(fragi>=0){
+                if(frags[fragi].coded!=flag){
+                  oc_block_run_pack(&_enc->opb,run_count);
+                  flag=!flag;
+                  run_count=1;
+                }
+                else run_count++;
+              }
+            }
+          }
+        }
+      }
+    }
+    /*Flush any trailing block coded run.*/
+    if(run_count>0)oc_block_run_pack(&_enc->opb,run_count);
+  }
+}
+
+static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){
+  const unsigned char *mode_codes;
+  const unsigned char *mode_bits;
+  const unsigned char *mode_ranks;
+  unsigned            *coded_mbis;
+  size_t               ncoded_mbis;
+  const signed char   *mb_modes;
+  unsigned             mbii;
+  int                  scheme;
+  int                  mb_mode;
+  scheme=_enc->chooser.scheme_list[0];
+  /*Encode the best scheme.*/
+  oggpackB_write(&_enc->opb,scheme,3);
+  /*If the chosen scheme is scheme 0, send the mode frequency ordering.*/
+  if(scheme==0){
+    for(mb_mode=0;mb_mode<OC_NMODES;mb_mode++){
+      oggpackB_write(&_enc->opb,_enc->chooser.scheme0_ranks[mb_mode],3);
+    }
+  }
+  mode_ranks=_enc->chooser.mode_ranks[scheme];
+  mode_bits=OC_MODE_BITS[scheme+1>>3];
+  mode_codes=OC_MODE_CODES[scheme+1>>3];
+  coded_mbis=_enc->coded_mbis;
+  ncoded_mbis=_enc->ncoded_mbis;
+  mb_modes=_enc->state.mb_modes;
+  for(mbii=0;mbii<ncoded_mbis;mbii++){
+    int rank;
+    rank=mode_ranks[mb_modes[coded_mbis[mbii]]];
+    oggpackB_write(&_enc->opb,mode_codes[rank],mode_bits[rank]);
+  }
+}
+
+static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _mv_scheme,int _dx,int _dy){
+  oggpackB_write(&_enc->opb,
+   OC_MV_CODES[_mv_scheme][_dx+31],OC_MV_BITS[_mv_scheme][_dx+31]);
+  oggpackB_write(&_enc->opb,
+   OC_MV_CODES[_mv_scheme][_dy+31],OC_MV_BITS[_mv_scheme][_dy+31]);
+}
+
+static void oc_enc_mvs_pack(oc_enc_ctx *_enc){
+  const unsigned     *coded_mbis;
+  size_t              ncoded_mbis;
+  const oc_mb_map    *mb_maps;
+  const signed char  *mb_modes;
+  const oc_fragment  *frags;
+  const oc_mv        *frag_mvs;
+  unsigned            mbii;
+  int                 mv_scheme;
+  /*Choose the coding scheme.*/
+  mv_scheme=_enc->mv_bits[1]<_enc->mv_bits[0];
+  oggpackB_write(&_enc->opb,mv_scheme,1);
+  /*Encode the motion vectors.
+    Macro blocks are iterated in Hilbert scan order, but the MVs within the
+     macro block are coded in raster order.*/
+  coded_mbis=_enc->coded_mbis;
+  ncoded_mbis=_enc->ncoded_mbis;
+  mb_modes=_enc->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  frags=_enc->state.frags;
+  frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
+  for(mbii=0;mbii<ncoded_mbis;mbii++){
+    ptrdiff_t fragi;
+    unsigned  mbi;
+    int       bi;
+    mbi=coded_mbis[mbii];
+    switch(mb_modes[mbi]){
+      case OC_MODE_INTER_MV:
+      case OC_MODE_GOLDEN_MV:{
+        for(bi=0;;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(frags[fragi].coded){
+            oc_enc_mv_pack(_enc,mv_scheme,
+             frag_mvs[fragi][0],frag_mvs[fragi][1]);
+            /*Only code a single MV for this macro block.*/
+            break;
+          }
+        }
+      }break;
+      case OC_MODE_INTER_MV_FOUR:{
+        for(bi=0;bi<4;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(frags[fragi].coded){
+            oc_enc_mv_pack(_enc,mv_scheme,
+             frag_mvs[fragi][0],frag_mvs[fragi][1]);
+            /*Keep coding all the MVs for this macro block.*/
+          }
+        }
+      }break;
+    }
+  }
+}
+
+static void oc_enc_block_qis_pack(oc_enc_ctx *_enc){
+  const oc_fragment *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          fragii;
+  ptrdiff_t          run_count;
+  ptrdiff_t          nqi0;
+  int                flag;
+  if(_enc->state.nqis<=1)return;
+  ncoded_fragis=_enc->state.ntotal_coded_fragis;
+  if(ncoded_fragis<=0)return;
+  coded_fragis=_enc->state.coded_fragis;
+  frags=_enc->state.frags;
+  flag=!!frags[coded_fragis[0]].qii;
+  oggpackB_write(&_enc->opb,flag,1);
+  nqi0=0;
+  for(fragii=0;fragii<ncoded_fragis;){
+    for(run_count=0;fragii<ncoded_fragis;fragii++){
+      if(!!frags[coded_fragis[fragii]].qii!=flag)break;
+      run_count++;
+      nqi0+=!flag;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,fragii>=ncoded_fragis);
+    flag=!flag;
+  }
+  if(_enc->state.nqis<3||nqi0>=ncoded_fragis)return;
+  for(fragii=0;!frags[coded_fragis[fragii]].qii;fragii++);
+  flag=frags[coded_fragis[fragii]].qii-1;
+  oggpackB_write(&_enc->opb,flag,1);
+  while(fragii<ncoded_fragis){
+    for(run_count=0;fragii<ncoded_fragis;fragii++){
+      int qii;
+      qii=frags[coded_fragis[fragii]].qii;
+      if(!qii)continue;
+      if(qii-1!=flag)break;
+      run_count++;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,fragii>=ncoded_fragis);
+    flag=!flag;
+  }
+}
+
+/*Counts the tokens of each type used for the given range of coefficient
+   indices in zig-zag order.
+  _zzi_start:      The first zig-zag index to include.
+  _zzi_end:        The first zig-zag index to not include.
+  _token_counts_y: Returns the token counts for the Y' plane.
+  _token_counts_c: Returns the token counts for the Cb and Cr planes.*/
+static void oc_enc_count_tokens(oc_enc_ctx *_enc,int _zzi_start,int _zzi_end,
+ ptrdiff_t _token_counts_y[32],ptrdiff_t _token_counts_c[32]){
+  const unsigned char *dct_tokens;
+  ptrdiff_t            ndct_tokens;
+  int                  pli;
+  int                  zzi;
+  ptrdiff_t            ti;
+  memset(_token_counts_y,0,32*sizeof(*_token_counts_y));
+  memset(_token_counts_c,0,32*sizeof(*_token_counts_c));
+  for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+    dct_tokens=_enc->dct_tokens[0][zzi];
+    ndct_tokens=_enc->ndct_tokens[0][zzi];
+    for(ti=_enc->dct_token_offs[0][zzi];ti<ndct_tokens;ti++){
+      _token_counts_y[dct_tokens[ti]]++;
+    }
+  }
+  for(pli=1;pli<3;pli++){
+    for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+      dct_tokens=_enc->dct_tokens[pli][zzi];
+      ndct_tokens=_enc->ndct_tokens[pli][zzi];
+      for(ti=_enc->dct_token_offs[pli][zzi];ti<ndct_tokens;ti++){
+        _token_counts_c[dct_tokens[ti]]++;
+      }
+    }
+  }
+}
+
+/*Computes the number of bits used for each of the potential Huffman code for
+   the given list of token counts.
+  The bits are added to whatever the current bit counts are.*/
+static void oc_enc_count_bits(oc_enc_ctx *_enc,int _hgi,
+ const ptrdiff_t _token_counts[32],size_t _bit_counts[16]){
+  int huffi;
+  int huff_offs;
+  int token;
+  huff_offs=_hgi<<4;
+  for(huffi=0;huffi<16;huffi++){
+    for(token=0;token<32;token++){
+      _bit_counts[huffi]+=
+       _token_counts[token]*_enc->huff_codes[huffi+huff_offs][token].nbits;
+    }
+  }
+}
+
+/*Returns the Huffman index using the fewest number of bits.*/
+static int oc_select_huff_idx(size_t _bit_counts[16]){
+  int best_huffi;
+  int huffi;
+  best_huffi=0;
+  for(huffi=1;huffi<16;huffi++)if(_bit_counts[huffi]<_bit_counts[best_huffi]){
+    best_huffi=huffi;
+  }
+  return best_huffi;
+}
+
+static void oc_enc_huff_group_pack(oc_enc_ctx *_enc,
+ int _zzi_start,int _zzi_end,const int _huff_idxs[2]){
+  int zzi;
+  for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+    int pli;
+    for(pli=0;pli<3;pli++){
+      const unsigned char *dct_tokens;
+      const ogg_uint16_t  *extra_bits;
+      ptrdiff_t            ndct_tokens;
+      const th_huff_code  *huff_codes;
+      ptrdiff_t            ti;
+      dct_tokens=_enc->dct_tokens[pli][zzi];
+      extra_bits=_enc->extra_bits[pli][zzi];
+      ndct_tokens=_enc->ndct_tokens[pli][zzi];
+      huff_codes=_enc->huff_codes[_huff_idxs[pli+1>>1]];
+      for(ti=_enc->dct_token_offs[pli][zzi];ti<ndct_tokens;ti++){
+        int token;
+        int neb;
+        token=dct_tokens[ti];
+        oggpackB_write(&_enc->opb,huff_codes[token].pattern,
+         huff_codes[token].nbits);
+        neb=OC_DCT_TOKEN_EXTRA_BITS[token];
+        if(neb)oggpackB_write(&_enc->opb,extra_bits[ti],neb);
+      }
+    }
+  }
+}
+
+static void oc_enc_residual_tokens_pack(oc_enc_ctx *_enc){
+  static const unsigned char  OC_HUFF_GROUP_MIN[6]={0,1,6,15,28,64};
+  static const unsigned char *OC_HUFF_GROUP_MAX=OC_HUFF_GROUP_MIN+1;
+  ptrdiff_t token_counts_y[32];
+  ptrdiff_t token_counts_c[32];
+  size_t    bits_y[16];
+  size_t    bits_c[16];
+  int       huff_idxs[2];
+  int       frame_type;
+  int       hgi;
+  frame_type=_enc->state.frame_type;
+  /*Choose which Huffman tables to use for the DC token list.*/
+  oc_enc_count_tokens(_enc,0,1,token_counts_y,token_counts_c);
+  memset(bits_y,0,sizeof(bits_y));
+  memset(bits_c,0,sizeof(bits_c));
+  oc_enc_count_bits(_enc,0,token_counts_y,bits_y);
+  oc_enc_count_bits(_enc,0,token_counts_c,bits_c);
+  huff_idxs[0]=oc_select_huff_idx(bits_y);
+  huff_idxs[1]=oc_select_huff_idx(bits_c);
+  /*Write the DC token list with the chosen tables.*/
+  oggpackB_write(&_enc->opb,huff_idxs[0],4);
+  oggpackB_write(&_enc->opb,huff_idxs[1],4);
+  _enc->huff_idxs[frame_type][0][0]=(unsigned char)huff_idxs[0];
+  _enc->huff_idxs[frame_type][0][1]=(unsigned char)huff_idxs[1];
+  oc_enc_huff_group_pack(_enc,0,1,huff_idxs);
+  /*Choose which Huffman tables to use for the AC token lists.*/
+  memset(bits_y,0,sizeof(bits_y));
+  memset(bits_c,0,sizeof(bits_c));
+  for(hgi=1;hgi<5;hgi++){
+    oc_enc_count_tokens(_enc,OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],
+     token_counts_y,token_counts_c);
+    oc_enc_count_bits(_enc,hgi,token_counts_y,bits_y);
+    oc_enc_count_bits(_enc,hgi,token_counts_c,bits_c);
+  }
+  huff_idxs[0]=oc_select_huff_idx(bits_y);
+  huff_idxs[1]=oc_select_huff_idx(bits_c);
+  /*Write the AC token lists using the chosen tables.*/
+  oggpackB_write(&_enc->opb,huff_idxs[0],4);
+  oggpackB_write(&_enc->opb,huff_idxs[1],4);
+  _enc->huff_idxs[frame_type][1][0]=(unsigned char)huff_idxs[0];
+  _enc->huff_idxs[frame_type][1][1]=(unsigned char)huff_idxs[1];
+  for(hgi=1;hgi<5;hgi++){
+    huff_idxs[0]+=16;
+    huff_idxs[1]+=16;
+    oc_enc_huff_group_pack(_enc,
+     OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],huff_idxs);
+  }
+}
+
+static void oc_enc_frame_pack(oc_enc_ctx *_enc){
+  oggpackB_reset(&_enc->opb);
+  /*Only proceed if we have some coded blocks.
+    If there are no coded blocks, we can drop this frame simply by emitting a
+     0 byte packet.*/
+  if(_enc->state.ntotal_coded_fragis>0){
+    oc_enc_frame_header_pack(_enc);
+    if(_enc->state.frame_type==OC_INTER_FRAME){
+      /*Coded block flags, MB modes, and MVs are only needed for delta frames.*/
+      oc_enc_coded_flags_pack(_enc);
+      oc_enc_mb_modes_pack(_enc);
+      oc_enc_mvs_pack(_enc);
+    }
+    oc_enc_block_qis_pack(_enc);
+    oc_enc_tokenize_finish(_enc);
+    oc_enc_residual_tokens_pack(_enc);
+  }
+  /*Success: Mark the packet as ready to be flushed.*/
+  _enc->packet_state=OC_PACKET_READY;
+#if defined(OC_COLLECT_METRICS)
+  oc_enc_mode_metrics_collect(_enc);
+#endif
+}
+
+
+void oc_enc_vtable_init_c(oc_enc_ctx *_enc){
+  /*The implementations prefixed with oc_enc_ are encoder-specific.
+    The rest we re-use from the decoder.*/
+  _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c;
+  _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c;
+  _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c;
+  _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_c;
+  _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_c;
+  _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c;
+  _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c;
+  _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c;
+  _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c;
+  _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
+  _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
+  _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c;
+}
+
+/*Initialize the macro block neighbor lists for MC analysis.
+  This assumes that the entire mb_info memory region has been initialized with
+   zeros.*/
+static void oc_enc_mb_info_init(oc_enc_ctx *_enc){
+  oc_mb_enc_info    *embs;
+  const signed char *mb_modes;
+  unsigned           nhsbs;
+  unsigned           nvsbs;
+  unsigned           nhmbs;
+  unsigned           nvmbs;
+  unsigned           sby;
+  mb_modes=_enc->state.mb_modes;
+  embs=_enc->mb_info;
+  nhsbs=_enc->state.fplanes[0].nhsbs;
+  nvsbs=_enc->state.fplanes[0].nvsbs;
+  nhmbs=_enc->state.nhmbs;
+  nvmbs=_enc->state.nvmbs;
+  for(sby=0;sby<nvsbs;sby++){
+    unsigned sbx;
+    for(sbx=0;sbx<nhsbs;sbx++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++){
+        /*Because of the Hilbert curve ordering the macro blocks are
+           visited in, the available neighbors change depending on where in
+           a super block the macro block is located.
+          Only the first three vectors are used in the median calculation
+           for the optimal predictor, and so the most important should be
+           listed first.
+          Additional vectors are used, so there will always be at least 3,
+           except for in the upper-left most macro block.*/
+        /*The number of current neighbors for each macro block position.*/
+        static const unsigned char NCNEIGHBORS[4]={4,3,2,4};
+        /*The offset of each current neighbor in the X direction.*/
+        static const signed char   CDX[4][4]={
+          {-1,0,1,-1},
+          {-1,0,-1,},
+          {-1,-1},
+          {-1,0,0,1}
+        };
+        /*The offset of each current neighbor in the Y direction.*/
+        static const signed char   CDY[4][4]={
+          {0,-1,-1,-1},
+          {0,-1,-1},
+          {0,-1},
+          {0,-1,1,-1}
+        };
+        /*The offset of each previous neighbor in the X direction.*/
+        static const signed char   PDX[4]={-1,0,1,0};
+        /*The offset of each previous neighbor in the Y direction.*/
+        static const signed char   PDY[4]={0,-1,0,1};
+        unsigned mbi;
+        int      mbx;
+        int      mby;
+        unsigned nmbi;
+        int      nmbx;
+        int      nmby;
+        int      ni;
+        mbi=(sby*nhsbs+sbx<<2)+quadi;
+        if(mb_modes[mbi]==OC_MODE_INVALID)continue;
+        mbx=2*sbx+(quadi>>1);
+        mby=2*sby+(quadi+1>>1&1);
+        /*Fill in the neighbors with current motion vectors available.*/
+        for(ni=0;ni<NCNEIGHBORS[quadi];ni++){
+          nmbx=mbx+CDX[quadi][ni];
+          nmby=mby+CDY[quadi][ni];
+          if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue;
+          nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1];
+          if(mb_modes[nmbi]==OC_MODE_INVALID)continue;
+          embs[mbi].cneighbors[embs[mbi].ncneighbors++]=nmbi;
+        }
+        /*Fill in the neighbors with previous motion vectors available.*/
+        for(ni=0;ni<4;ni++){
+          nmbx=mbx+PDX[ni];
+          nmby=mby+PDY[ni];
+          if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue;
+          nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1];
+          if(mb_modes[nmbi]==OC_MODE_INVALID)continue;
+          embs[mbi].pneighbors[embs[mbi].npneighbors++]=nmbi;
+        }
+      }
+    }
+  }
+}
+
+static int oc_enc_set_huffman_codes(oc_enc_ctx *_enc,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
+  int ret;
+  if(_enc==NULL)return TH_EFAULT;
+  if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL;
+  if(_codes==NULL)_codes=TH_VP31_HUFF_CODES;
+  /*Validate the codes.*/
+  oggpackB_reset(&_enc->opb);
+  ret=oc_huff_codes_pack(&_enc->opb,_codes);
+  if(ret<0)return ret;
+  memcpy(_enc->huff_codes,_codes,sizeof(_enc->huff_codes));
+  return 0;
+}
+
+/*Sets the quantization parameters to use.
+  This may only be called before the setup header is written.
+  If it is called multiple times, only the last call has any effect.
+  _qinfo: The quantization parameters.
+          These are described in more detail in theoraenc.h.
+          This can be NULL, in which case the default quantization parameters
+           will be used.*/
+static int oc_enc_set_quant_params(oc_enc_ctx *_enc,
+ const th_quant_info *_qinfo){
+  int qi;
+  int pli;
+  int qti;
+  if(_enc==NULL)return TH_EFAULT;
+  if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL;
+  if(_qinfo==NULL)_qinfo=&TH_DEF_QUANT_INFO;
+  /*TODO: Analyze for packing purposes instead of just doing a shallow copy.*/
+  memcpy(&_enc->qinfo,_qinfo,sizeof(_enc->qinfo));
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _enc->state.dequant_tables[qi][pli][qti]=
+     _enc->state.dequant_table_data[qi][pli][qti];
+    _enc->enquant_tables[qi][pli][qti]=_enc->enquant_table_data[qi][pli][qti];
+  }
+  oc_enquant_tables_init(_enc->state.dequant_tables,
+   _enc->enquant_tables,_qinfo);
+  memcpy(_enc->state.loop_filter_limits,_qinfo->loop_filter_limits,
+   sizeof(_enc->state.loop_filter_limits));
+  oc_enquant_qavg_init(_enc->log_qavg,_enc->state.dequant_tables,
+   _enc->state.info.pixel_fmt);
+  return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc);
+
+static int oc_enc_init(oc_enc_ctx *_enc,const th_info *_info){
+  th_info   info;
+  size_t    mcu_nmbs;
+  ptrdiff_t mcu_nfrags;
+  int       hdec;
+  int       vdec;
+  int       ret;
+  int       pli;
+  /*Clean up the requested settings.*/
+  memcpy(&info,_info,sizeof(info));
+  info.version_major=TH_VERSION_MAJOR;
+  info.version_minor=TH_VERSION_MINOR;
+  info.version_subminor=TH_VERSION_SUB;
+  if(info.quality>63)info.quality=63;
+  if(info.quality<0)info.quality=32;
+  if(info.target_bitrate<0)info.target_bitrate=0;
+  /*Initialize the shared encoder/decoder state.*/
+  ret=oc_state_init(&_enc->state,&info,4);
+  if(ret<0)return ret;
+  _enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info));
+  _enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc));
+  _enc->coded_mbis=
+   (unsigned *)_ogg_malloc(_enc->state.nmbs*sizeof(*_enc->coded_mbis));
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  /*If chroma is sub-sampled in the vertical direction, we have to encode two
+     super block rows of Y' for each super block row of Cb and Cr.*/
+  _enc->mcu_nvsbs=1<<vdec;
+  mcu_nmbs=_enc->mcu_nvsbs*_enc->state.fplanes[0].nhsbs*(size_t)4;
+  mcu_nfrags=4*mcu_nmbs+(8*mcu_nmbs>>hdec+vdec);
+  _enc->mcu_skip_ssd=(unsigned *)_ogg_malloc(
+   mcu_nfrags*sizeof(*_enc->mcu_skip_ssd));
+  for(pli=0;pli<3;pli++){
+    _enc->dct_tokens[pli]=(unsigned char **)oc_malloc_2d(64,
+     _enc->state.fplanes[pli].nfrags,sizeof(**_enc->dct_tokens));
+    _enc->extra_bits[pli]=(ogg_uint16_t **)oc_malloc_2d(64,
+     _enc->state.fplanes[pli].nfrags,sizeof(**_enc->extra_bits));
+  }
+#if defined(OC_COLLECT_METRICS)
+  _enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd));
+  _enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd));
+#endif
+#if defined(OC_X86_ASM)
+  oc_enc_vtable_init_x86(_enc);
+#else
+  oc_enc_vtable_init_c(_enc);
+#endif
+  _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift;
+  _enc->state.qis[0]=_enc->state.info.quality;
+  _enc->state.nqis=1;
+  oc_rc_state_init(&_enc->rc,_enc);
+  oggpackB_writeinit(&_enc->opb);
+  if(_enc->mb_info==NULL||_enc->frag_dc==NULL||_enc->coded_mbis==NULL||
+   _enc->mcu_skip_ssd==NULL||_enc->dct_tokens[0]==NULL||
+   _enc->dct_tokens[1]==NULL||_enc->dct_tokens[2]==NULL||
+   _enc->extra_bits[0]==NULL||_enc->extra_bits[1]==NULL||
+   _enc->extra_bits[2]==NULL
+#if defined(OC_COLLECT_METRICS)
+   ||_enc->frag_satd==NULL||_enc->frag_ssd==NULL
+#endif
+   ){
+    oc_enc_clear(_enc);
+    return TH_EFAULT;
+  }
+  oc_mode_scheme_chooser_init(&_enc->chooser);
+  oc_enc_mb_info_init(_enc);
+  memset(_enc->huff_idxs,0,sizeof(_enc->huff_idxs));
+  /*Reset the packet-out state machine.*/
+  _enc->packet_state=OC_PACKET_INFO_HDR;
+  _enc->dup_count=0;
+  _enc->nqueued_dups=0;
+  _enc->prev_dup_count=0;
+  /*Enable speed optimizations up through early skip by default.*/
+  _enc->sp_level=OC_SP_LEVEL_EARLY_SKIP;
+  /*Disable VP3 compatibility by default.*/
+  _enc->vp3_compatible=0;
+  /*No INTER frames coded yet.*/
+  _enc->coded_inter_frame=0;
+  memcpy(_enc->huff_codes,TH_VP31_HUFF_CODES,sizeof(_enc->huff_codes));
+  oc_enc_set_quant_params(_enc,NULL);
+  return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc){
+  int pli;
+  oc_rc_state_clear(&_enc->rc);
+#if defined(OC_COLLECT_METRICS)
+  oc_enc_mode_metrics_dump(_enc);
+#endif
+  oggpackB_writeclear(&_enc->opb);
+#if defined(OC_COLLECT_METRICS)
+  _ogg_free(_enc->frag_ssd);
+  _ogg_free(_enc->frag_satd);
+#endif
+  for(pli=3;pli-->0;){
+    oc_free_2d(_enc->extra_bits[pli]);
+    oc_free_2d(_enc->dct_tokens[pli]);
+  }
+  _ogg_free(_enc->mcu_skip_ssd);
+  _ogg_free(_enc->coded_mbis);
+  _ogg_free(_enc->frag_dc);
+  _ogg_free(_enc->mb_info);
+  oc_state_clear(&_enc->state);
+}
+
+static void oc_enc_drop_frame(th_enc_ctx *_enc){
+  /*Use the previous frame's reconstruction.*/
+  _enc->state.ref_frame_idx[OC_FRAME_SELF]=
+   _enc->state.ref_frame_idx[OC_FRAME_PREV];
+  /*Flag motion vector analysis about the frame drop.*/
+  _enc->prevframe_dropped=1;
+  /*Zero the packet.*/
+  oggpackB_reset(&_enc->opb);
+}
+
+static void oc_enc_compress_keyframe(oc_enc_ctx *_enc,int _recode){
+  if(_enc->state.info.target_bitrate>0){
+    _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTRA_FRAME,
+     _enc->state.curframe_num>0);
+    _enc->state.nqis=1;
+  }
+  oc_enc_calc_lambda(_enc,OC_INTRA_FRAME);
+  oc_enc_analyze_intra(_enc,_recode);
+  oc_enc_frame_pack(_enc);
+  /*On the first frame, the previous call was an initial dry-run to prime
+     feed-forward statistics.*/
+  if(!_recode&&_enc->state.curframe_num==0){
+    if(_enc->state.info.target_bitrate>0){
+      oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+                             OC_INTRA_FRAME,_enc->state.qis[0],1,0);
+    }
+    oc_enc_compress_keyframe(_enc,1);
+  }
+}
+
+static void oc_enc_compress_frame(oc_enc_ctx *_enc,int _recode){
+  if(_enc->state.info.target_bitrate>0){
+    _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTER_FRAME,1);
+    _enc->state.nqis=1;
+  }
+  oc_enc_calc_lambda(_enc,OC_INTER_FRAME);
+  if(oc_enc_analyze_inter(_enc,_enc->rc.twopass!=2,_recode)){
+    /*Mode analysis thinks this should have been a keyframe; start over.*/
+    oc_enc_compress_keyframe(_enc,1);
+  }
+  else{
+    oc_enc_frame_pack(_enc);
+    if(!_enc->coded_inter_frame){
+      /*On the first INTER frame, the previous call was an initial dry-run to
+         prime feed-forward statistics.*/
+      _enc->coded_inter_frame=1;
+      if(_enc->state.info.target_bitrate>0){
+        /*Rate control also needs to prime.*/
+        oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+         OC_INTER_FRAME,_enc->state.qis[0],1,0);
+      }
+      oc_enc_compress_frame(_enc,1);
+    }
+  }
+}
+
+/*Set the granule position for the next packet to output based on the current
+   internal state.*/
+static void oc_enc_set_granpos(oc_enc_ctx *_enc){
+  unsigned dup_offs;
+  /*Add an offset for the number of duplicate frames we've emitted so far.*/
+  dup_offs=_enc->prev_dup_count-_enc->nqueued_dups;
+  /*If the current frame was a keyframe, use it for the high part.*/
+  if(_enc->state.frame_type==OC_INTRA_FRAME){
+    _enc->state.granpos=(_enc->state.curframe_num+_enc->state.granpos_bias<<
+     _enc->state.info.keyframe_granule_shift)+dup_offs;
+  }
+  /*Otherwise use the last keyframe in the high part and put the current frame
+     in the low part.*/
+  else{
+    _enc->state.granpos=
+     (_enc->state.keyframe_num+_enc->state.granpos_bias<<
+     _enc->state.info.keyframe_granule_shift)
+     +_enc->state.curframe_num-_enc->state.keyframe_num+dup_offs;
+  }
+}
+
+
+th_enc_ctx *th_encode_alloc(const th_info *_info){
+  oc_enc_ctx *enc;
+  if(_info==NULL)return NULL;
+  enc=_ogg_malloc(sizeof(*enc));
+  if(enc==NULL||oc_enc_init(enc,_info)<0){
+    _ogg_free(enc);
+    return NULL;
+  }
+  return enc;
+}
+
+void th_encode_free(th_enc_ctx *_enc){
+  if(_enc!=NULL){
+    oc_enc_clear(_enc);
+    _ogg_free(_enc);
+  }
+}
+
+int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){
+  switch(_req){
+    case TH_ENCCTL_SET_HUFFMAN_CODES:{
+      if(_buf==NULL&&_buf_sz!=0||
+       _buf!=NULL&&_buf_sz!=sizeof(th_huff_table)*TH_NHUFFMAN_TABLES){
+        return TH_EINVAL;
+      }
+      return oc_enc_set_huffman_codes(_enc,(const th_huff_table *)_buf);
+    }break;
+    case TH_ENCCTL_SET_QUANT_PARAMS:{
+      if(_buf==NULL&&_buf_sz!=0||
+       _buf!=NULL&&_buf_sz!=sizeof(th_quant_info)){
+        return TH_EINVAL;
+      }
+      return oc_enc_set_quant_params(_enc,(th_quant_info *)_buf);
+    }break;
+    case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:{
+      ogg_uint32_t keyframe_frequency_force;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(keyframe_frequency_force))return TH_EINVAL;
+      keyframe_frequency_force=*(ogg_uint32_t *)_buf;
+      if(keyframe_frequency_force<=0)keyframe_frequency_force=1;
+      if(_enc->packet_state==OC_PACKET_INFO_HDR){
+        /*It's still early enough to enlarge keyframe_granule_shift.*/
+        _enc->state.info.keyframe_granule_shift=OC_CLAMPI(
+         _enc->state.info.keyframe_granule_shift,
+         OC_ILOG_32(keyframe_frequency_force-1),31);
+      }
+      _enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force,
+       (ogg_uint32_t)1U<<_enc->state.info.keyframe_granule_shift);
+      *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_VP3_COMPATIBLE:{
+      int vp3_compatible;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(vp3_compatible))return TH_EINVAL;
+      vp3_compatible=*(int *)_buf;
+      _enc->vp3_compatible=vp3_compatible;
+      if(oc_enc_set_huffman_codes(_enc,TH_VP31_HUFF_CODES)<0)vp3_compatible=0;
+      if(oc_enc_set_quant_params(_enc,&TH_VP31_QUANT_INFO)<0)vp3_compatible=0;
+      if(_enc->state.info.pixel_fmt!=TH_PF_420||
+       _enc->state.info.pic_width<_enc->state.info.frame_width||
+       _enc->state.info.pic_height<_enc->state.info.frame_height||
+      /*If we have more than 4095 super blocks, VP3's RLE coding might
+         overflow.
+        We could overcome this by ensuring we flip the coded/not-coded flags on
+         at least one super block in the frame, but we pick the simple solution
+         of just telling the user the stream will be incompatible instead.
+        It's unlikely the old VP3 codec would be able to decode streams at this
+         resolution in real time in the first place.*/
+       _enc->state.nsbs>4095){
+        vp3_compatible=0;
+      }
+      *(int *)_buf=vp3_compatible;
+      return 0;
+    }break;
+    case TH_ENCCTL_GET_SPLEVEL_MAX:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(int))return TH_EINVAL;
+      *(int *)_buf=OC_SP_LEVEL_MAX;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_SPLEVEL:{
+      int speed;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(speed))return TH_EINVAL;
+      speed=*(int *)_buf;
+      if(speed<0||speed>OC_SP_LEVEL_MAX)return TH_EINVAL;
+      _enc->sp_level=speed;
+      return 0;
+    }break;
+    case TH_ENCCTL_GET_SPLEVEL:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(int))return TH_EINVAL;
+      *(int *)_buf=_enc->sp_level;
+      return 0;
+    }
+    case TH_ENCCTL_SET_DUP_COUNT:{
+      int dup_count;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(dup_count))return TH_EINVAL;
+      dup_count=*(int *)_buf;
+      if(dup_count>=_enc->keyframe_frequency_force)return TH_EINVAL;
+      _enc->dup_count=OC_MAXI(dup_count,0);
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_QUALITY:{
+      int qi;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate>0)return TH_EINVAL;
+      qi=*(int *)_buf;
+      if(qi<0||qi>63)return TH_EINVAL;
+      _enc->state.info.quality=qi;
+      _enc->state.qis[0]=(unsigned char)qi;
+      _enc->state.nqis=1;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_BITRATE:{
+      long bitrate;
+      int  reset;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      bitrate=*(long *)_buf;
+      if(bitrate<=0)return TH_EINVAL;
+      reset=_enc->state.info.target_bitrate<=0;
+      _enc->state.info.target_bitrate=bitrate>INT_MAX?INT_MAX:bitrate;
+      if(reset)oc_rc_state_init(&_enc->rc,_enc);
+      else oc_enc_rc_resize(_enc);
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_RATE_FLAGS:{
+      int set;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(set))return TH_EINVAL;
+      if(_enc->state.info.target_bitrate<=0)return TH_EINVAL;
+      set=*(int *)_buf;
+      _enc->rc.drop_frames=set&TH_RATECTL_DROP_FRAMES;
+      _enc->rc.cap_overflow=set&TH_RATECTL_CAP_OVERFLOW;
+      _enc->rc.cap_underflow=set&TH_RATECTL_CAP_UNDERFLOW;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_RATE_BUFFER:{
+      int set;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(set))return TH_EINVAL;
+      if(_enc->state.info.target_bitrate<=0)return TH_EINVAL;
+      set=*(int *)_buf;
+      _enc->rc.buf_delay=set;
+      oc_enc_rc_resize(_enc);
+      *(int *)_buf=_enc->rc.buf_delay;
+      return 0;
+    }break;
+    case TH_ENCCTL_2PASS_OUT:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate<=0||
+       _enc->state.curframe_num>=0&&_enc->rc.twopass!=1||
+       _buf_sz!=sizeof(unsigned char *)){
+        return TH_EINVAL;
+      }
+      return oc_enc_rc_2pass_out(_enc,(unsigned char **)_buf);
+    }break;
+    case TH_ENCCTL_2PASS_IN:{
+      if(_enc==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate<=0||
+       _enc->state.curframe_num>=0&&_enc->rc.twopass!=2){
+        return TH_EINVAL;
+      }
+      return oc_enc_rc_2pass_in(_enc,_buf,_buf_sz);
+    }break;
+    default:return TH_EIMPL;
+  }
+}
+
+int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){
+  if(_enc==NULL)return TH_EFAULT;
+  return oc_state_flushheader(&_enc->state,&_enc->packet_state,&_enc->opb,
+   &_enc->qinfo,(const th_huff_table *)_enc->huff_codes,th_version_string(),
+   _tc,_op);
+}
+
+static void oc_img_plane_copy_pad(th_img_plane *_dst,th_img_plane *_src,
+ ogg_int32_t _pic_x,ogg_int32_t _pic_y,
+ ogg_int32_t _pic_width,ogg_int32_t _pic_height){
+  unsigned char *dst;
+  int            dstride;
+  ogg_uint32_t   frame_width;
+  ogg_uint32_t   frame_height;
+  ogg_uint32_t   y;
+  frame_width=_dst->width;
+  frame_height=_dst->height;
+  /*If we have _no_ data, just encode a dull green.*/
+  if(_pic_width==0||_pic_height==0){
+    dst=_dst->data;
+    dstride=_dst->stride;
+    for(y=0;y<frame_height;y++){
+      memset(dst,0,frame_width*sizeof(*dst));
+      dst+=dstride;
+    }
+  }
+  /*Otherwise, copy what we do have, and add our own padding.*/
+  else{
+    unsigned char *dst_data;
+    unsigned char *src_data;
+    unsigned char *src;
+    int            sstride;
+    ogg_uint32_t   x;
+    /*Step 1: Copy the data we do have.*/
+    dstride=_dst->stride;
+    sstride=_src->stride;
+    dst_data=_dst->data;
+    src_data=_src->data;
+    dst=dst_data+_pic_y*(ptrdiff_t)dstride+_pic_x;
+    src=src_data+_pic_y*(ptrdiff_t)sstride+_pic_x;
+    for(y=0;y<_pic_height;y++){
+      memcpy(dst,src,_pic_width);
+      dst+=dstride;
+      src+=sstride;
+    }
+    /*Step 2: Perform a low-pass extension into the padding region.*/
+    /*Left side.*/
+    for(x=_pic_x;x-->0;){
+      dst=dst_data+_pic_y*(ptrdiff_t)dstride+x;
+      for(y=0;y<_pic_height;y++){
+        dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1]
+         +(dst+(dstride&-(y+1<_pic_height)))[1]+2>>2;
+        dst+=dstride;
+      }
+    }
+    /*Right side.*/
+    for(x=_pic_x+_pic_width;x<frame_width;x++){
+      dst=dst_data+_pic_y*(ptrdiff_t)dstride+x-1;
+      for(y=0;y<_pic_height;y++){
+        dst[1]=(dst[0]<<1)+(dst-(dstride&-(y>0)))[0]
+         +(dst+(dstride&-(y+1<_pic_height)))[0]+2>>2;
+        dst+=dstride;
+      }
+    }
+    /*Top.*/
+    dst=dst_data+_pic_y*(ptrdiff_t)dstride;
+    for(y=_pic_y;y-->0;){
+      for(x=0;x<frame_width;x++){
+        (dst-dstride)[x]=(dst[x]<<1)+dst[x-(x>0)]
+         +dst[x+(x+1<frame_width)]+2>>2;
+      }
+      dst-=dstride;
+    }
+    /*Bottom.*/
+    dst=dst_data+(_pic_y+_pic_height)*(ptrdiff_t)dstride;
+    for(y=_pic_y+_pic_height;y<frame_height;y++){
+      for(x=0;x<frame_width;x++){
+        dst[x]=((dst-dstride)[x]<<1)+(dst-dstride)[x-(x>0)]
+         +(dst-dstride)[x+(x+1<frame_width)]+2>>2;
+      }
+      dst+=dstride;
+    }
+  }
+}
+
+int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){
+  th_ycbcr_buffer img;
+  int             cframe_width;
+  int             cframe_height;
+  int             cpic_width;
+  int             cpic_height;
+  int             cpic_x;
+  int             cpic_y;
+  int             hdec;
+  int             vdec;
+  int             pli;
+  int             refi;
+  int             drop;
+  /*Step 1: validate parameters.*/
+  if(_enc==NULL||_img==NULL)return TH_EFAULT;
+  if(_enc->packet_state==OC_PACKET_DONE)return TH_EINVAL;
+  if(_enc->rc.twopass&&_enc->rc.twopass_buffer_bytes==0)return TH_EINVAL;
+  if((ogg_uint32_t)_img[0].width!=_enc->state.info.frame_width||
+   (ogg_uint32_t)_img[0].height!=_enc->state.info.frame_height){
+    return TH_EINVAL;
+  }
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  cframe_width=_enc->state.info.frame_width>>hdec;
+  cframe_height=_enc->state.info.frame_height>>vdec;
+  if(_img[1].width!=cframe_width||_img[2].width!=cframe_width||
+   _img[1].height!=cframe_height||_img[2].height!=cframe_height){
+    return TH_EINVAL;
+  }
+  /*Step 2: Copy the input to our internal buffer.
+    This lets us add padding, if necessary, so we don't have to worry about
+     dereferencing possibly invalid addresses, and allows us to use the same
+     strides and fragment offsets for both the input frame and the reference
+     frames.*/
+  /*Flip the input buffer upside down.*/
+  oc_ycbcr_buffer_flip(img,_img);
+  oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+0,img+0,
+   _enc->state.info.pic_x,_enc->state.info.pic_y,
+   _enc->state.info.pic_width,_enc->state.info.pic_height);
+  cpic_x=_enc->state.info.pic_x>>hdec;
+  cpic_y=_enc->state.info.pic_y>>vdec;
+  cpic_width=(_enc->state.info.pic_x+_enc->state.info.pic_width+hdec>>hdec)
+   -cpic_x;
+  cpic_height=(_enc->state.info.pic_y+_enc->state.info.pic_height+vdec>>vdec)
+   -cpic_y;
+  for(pli=1;pli<3;pli++){
+    oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+pli,img+pli,
+     cpic_x,cpic_y,cpic_width,cpic_height);
+  }
+  /*Step 3: Update the buffer state.*/
+  if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){
+    _enc->state.ref_frame_idx[OC_FRAME_PREV]=
+     _enc->state.ref_frame_idx[OC_FRAME_SELF];
+    if(_enc->state.frame_type==OC_INTRA_FRAME){
+      /*The new frame becomes both the previous and gold reference frames.*/
+      _enc->state.keyframe_num=_enc->state.curframe_num;
+      _enc->state.ref_frame_idx[OC_FRAME_GOLD]=
+       _enc->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+  }
+  /*Select a free buffer to use for the reconstructed version of this frame.*/
+  for(refi=0;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]||
+   refi==_enc->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+  _enc->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+  _enc->state.curframe_num+=_enc->prev_dup_count+1;
+  /*Step 4: Compress the frame.*/
+  /*Start with a keyframe, and don't allow the generation of invalid files that
+     overflow the keyframe_granule_shift.*/
+  if(_enc->rc.twopass_force_kf||_enc->state.curframe_num==0||
+   _enc->state.curframe_num-_enc->state.keyframe_num+_enc->dup_count>=
+   _enc->keyframe_frequency_force){
+    oc_enc_compress_keyframe(_enc,0);
+    drop=0;
+  }
+  else{
+    oc_enc_compress_frame(_enc,0);
+    drop=1;
+  }
+  oc_restore_fpu(&_enc->state);
+  /*drop currently indicates if the frame is droppable.*/
+  if(_enc->state.info.target_bitrate>0){
+    drop=oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+     _enc->state.frame_type,_enc->state.qis[0],0,drop);
+  }
+  else drop=0;
+  /*drop now indicates if the frame was dropped.*/
+  if(drop)oc_enc_drop_frame(_enc);
+  else _enc->prevframe_dropped=0;
+  _enc->packet_state=OC_PACKET_READY;
+  _enc->prev_dup_count=_enc->nqueued_dups=_enc->dup_count;
+  _enc->dup_count=0;
+#if defined(OC_DUMP_IMAGES)
+  oc_enc_set_granpos(_enc);
+  oc_state_dump_frame(&_enc->state,OC_FRAME_IO,"src");
+  oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec");
+#endif
+  return 0;
+}
+
+int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){
+  if(_enc==NULL||_op==NULL)return TH_EFAULT;
+  if(_enc->packet_state==OC_PACKET_READY){
+    _enc->packet_state=OC_PACKET_EMPTY;
+    if(_enc->rc.twopass!=1){
+      unsigned char *packet;
+      packet=oggpackB_get_buffer(&_enc->opb);
+      /*If there's no packet, malloc failed while writing; it's lost forever.*/
+      if(packet==NULL)return TH_EFAULT;
+      _op->packet=packet;
+      _op->bytes=oggpackB_bytes(&_enc->opb);
+    }
+    /*For the first pass in 2-pass mode, don't emit any packet data.*/
+    else{
+      _op->packet=NULL;
+      _op->bytes=0;
+    }
+  }
+  else if(_enc->packet_state==OC_PACKET_EMPTY){
+    if(_enc->nqueued_dups>0){
+      _enc->nqueued_dups--;
+      _op->packet=NULL;
+      _op->bytes=0;
+    }
+    else{
+      if(_last_p)_enc->packet_state=OC_PACKET_DONE;
+      return 0;
+    }
+  }
+  else return 0;
+  _last_p=_last_p&&_enc->nqueued_dups<=0;
+  _op->b_o_s=0;
+  _op->e_o_s=_last_p;
+  oc_enc_set_granpos(_enc);
+  _op->packetno=th_granule_frame(_enc,_enc->state.granpos)+3;
+  _op->granulepos=_enc->state.granpos;
+  if(_last_p)_enc->packet_state=OC_PACKET_DONE;
+  return 1+_enc->nqueued_dups;
+}

+ 274 - 0
Engine/lib/libtheora/lib/enquant.c

@@ -0,0 +1,274 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: enquant.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){
+  const th_quant_ranges *qranges;
+  const th_quant_base   *base_mats[2*3*64];
+  int                    indices[2][3][64];
+  int                    nbase_mats;
+  int                    nbits;
+  int                    ci;
+  int                    qi;
+  int                    qri;
+  int                    qti;
+  int                    pli;
+  int                    qtj;
+  int                    plj;
+  int                    bmi;
+  int                    i;
+  i=_qinfo->loop_filter_limits[0];
+  for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]);
+  nbits=OC_ILOG_32(i);
+  oggpackB_write(_opb,nbits,3);
+  for(qi=0;qi<64;qi++){
+    oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits);
+  }
+  /*580 bits for VP3.*/
+  i=1;
+  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i);
+  nbits=OC_ILOGNZ_32(i);
+  oggpackB_write(_opb,nbits-1,4);
+  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits);
+  /*516 bits for VP3.*/
+  i=1;
+  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i);
+  nbits=OC_ILOGNZ_32(i);
+  oggpackB_write(_opb,nbits-1,4);
+  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits);
+  /*Consolidate any duplicate base matrices.*/
+  nbase_mats=0;
+  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    for(qri=0;qri<=qranges->nranges;qri++){
+      for(bmi=0;;bmi++){
+        if(bmi>=nbase_mats){
+          base_mats[bmi]=qranges->base_matrices+qri;
+          indices[qti][pli][qri]=nbase_mats++;
+          break;
+        }
+        else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri],
+         sizeof(base_mats[bmi][0]))==0){
+          indices[qti][pli][qri]=bmi;
+          break;
+        }
+      }
+    }
+  }
+  /*Write out the list of unique base matrices.
+    1545 bits for VP3 matrices.*/
+  oggpackB_write(_opb,nbase_mats-1,9);
+  for(bmi=0;bmi<nbase_mats;bmi++){
+    for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8);
+  }
+  /*Now store quant ranges and their associated indices into the base matrix
+     list.
+    46 bits for VP3 matrices.*/
+  nbits=OC_ILOG_32(nbase_mats-1);
+  for(i=0;i<6;i++){
+    qti=i/3;
+    pli=i%3;
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    if(i>0){
+      if(qti>0){
+        if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&&
+         memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes,
+         qranges->nranges*sizeof(qranges->sizes[0]))==0&&
+         memcmp(indices[qti][pli],indices[qti-1][pli],
+         (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
+          oggpackB_write(_opb,1,2);
+          continue;
+        }
+      }
+      qtj=(i-1)/3;
+      plj=(i-1)%3;
+      if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&&
+       memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes,
+       qranges->nranges*sizeof(qranges->sizes[0]))==0&&
+       memcmp(indices[qti][pli],indices[qtj][plj],
+       (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
+        oggpackB_write(_opb,0,1+(qti>0));
+        continue;
+      }
+      oggpackB_write(_opb,1,1);
+    }
+    oggpackB_write(_opb,indices[qti][pli][0],nbits);
+    for(qi=qri=0;qi<63;qri++){
+      oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi));
+      qi+=qranges->sizes[qri];
+      oggpackB_write(_opb,indices[qti][pli][qri+1],nbits);
+    }
+  }
+}
+
+static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){
+  ogg_uint32_t t;
+  int          l;
+  _d<<=1;
+  l=OC_ILOGNZ_32(_d)-1;
+  t=1+((ogg_uint32_t)1<<16+l)/_d;
+  _this->m=(ogg_int16_t)(t-0x10000);
+  _this->l=l;
+}
+
+/*See comments at oc_dequant_tables_init() for how the quantization tables'
+   storage should be initialized.*/
+void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo){
+  int qi;
+  int pli;
+  int qti;
+  /*Initialize the dequantization tables first.*/
+  oc_dequant_tables_init(_dequant,NULL,_qinfo);
+  /*Derive the quantization tables directly from the dequantization tables.*/
+  for(qi=0;qi<64;qi++)for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    int zzi;
+    int plj;
+    int qtj;
+    int dupe;
+    dupe=0;
+    for(qtj=0;qtj<=qti;qtj++){
+      for(plj=0;plj<(qtj<qti?3:pli);plj++){
+        if(_dequant[qi][pli][qti]==_dequant[qi][plj][qtj]){
+          dupe=1;
+          break;
+        }
+      }
+      if(dupe)break;
+    }
+    if(dupe){
+      _enquant[qi][pli][qti]=_enquant[qi][plj][qtj];
+      continue;
+    }
+    /*In the original VP3.2 code, the rounding offset and the size of the
+       dead zone around 0 were controlled by a "sharpness" parameter.
+      We now R-D optimize the tokens for each block after quantization,
+       so the rounding offset should always be 1/2, and an explicit dead
+       zone is unnecessary.
+      Hence, all of that VP3.2 code is gone from here, and the remaining
+       floating point code has been implemented as equivalent integer
+       code with exact precision.*/
+    for(zzi=0;zzi<64;zzi++){
+      oc_iquant_init(_enquant[qi][pli][qti]+zzi,
+       _dequant[qi][pli][qti][zzi]);
+    }
+  }
+}
+
+
+
+/*This table gives the square root of the fraction of the squared magnitude of
+   each DCT coefficient relative to the total, scaled by 2**16, for both INTRA
+   and INTER modes.
+  These values were measured after motion-compensated prediction, before
+   quantization, over a large set of test video (from QCIF to 1080p) encoded at
+   all possible rates.
+  The DC coefficient takes into account the DPCM prediction (using the
+   quantized values from neighboring blocks, as the encoder does, but still
+   before quantization of the coefficient in the current block).
+  The results differ significantly from the expected variance (e.g., using an
+   AR(1) model of the signal with rho=0.95, as is frequently done to compute
+   the coding gain of the DCT).
+  We use them to estimate an "average" quantizer for a given quantizer matrix,
+   as this is used to parameterize a number of the rate control decisions.
+  These values are themselves probably quantizer-matrix dependent, since the
+   shape of the matrix affects the noise distribution in the reference frames,
+   but they should at least give us _some_ amount of adaptivity to different
+   matrices, as opposed to hard-coding a table of average Q values for the
+   current set.
+  The main features they capture are that a) only a few of the quantizers in
+   the upper-left corner contribute anything significant at all (though INTER
+   mode is significantly flatter) and b) the DPCM prediction of the DC
+   coefficient gives a very minor improvement in the INTRA case and a quite
+   significant one in the INTER case (over the expected variance).*/
+static const ogg_uint16_t OC_RPSD[2][64]={
+  {
+    52725,17370,10399, 6867, 5115, 3798, 2942, 2076,
+    17370, 9900, 6948, 4994, 3836, 2869, 2229, 1619,
+    10399, 6948, 5516, 4202, 3376, 2573, 2015, 1461,
+     6867, 4994, 4202, 3377, 2800, 2164, 1718, 1243,
+     5115, 3836, 3376, 2800, 2391, 1884, 1530, 1091,
+     3798, 2869, 2573, 2164, 1884, 1495, 1212,  873,
+     2942, 2229, 2015, 1718, 1530, 1212, 1001,  704,
+     2076, 1619, 1461, 1243, 1091,  873,  704,  474
+  },
+  {
+    23411,15604,13529,11601,10683, 8958, 7840, 6142,
+    15604,11901,10718, 9108, 8290, 6961, 6023, 4487,
+    13529,10718, 9961, 8527, 7945, 6689, 5742, 4333,
+    11601, 9108, 8527, 7414, 7084, 5923, 5175, 3743,
+    10683, 8290, 7945, 7084, 6771, 5754, 4793, 3504,
+     8958, 6961, 6689, 5923, 5754, 4679, 3936, 2989,
+     7840, 6023, 5742, 5175, 4793, 3936, 3522, 2558,
+     6142, 4487, 4333, 3743, 3504, 2989, 2558, 1829
+  }
+};
+
+/*The fraction of the squared magnitude of the residuals in each color channel
+   relative to the total, scaled by 2**16, for each pixel format.
+  These values were measured after motion-compensated prediction, before
+   quantization, over a large set of test video encoded at all possible rates.
+  TODO: These values are only from INTER frames; it should be re-measured for
+   INTRA frames.*/
+static const ogg_uint16_t OC_PCD[4][3]={
+  {59926, 3038, 2572},
+  {55201, 5597, 4738},
+  {55201, 5597, 4738},
+  {47682, 9669, 8185}
+};
+
+
+/*Compute an "average" quantizer for each qi level.
+  We do one for INTER and one for INTRA, since their behavior is very
+   different, but average across chroma channels.
+  The basic approach is to compute a harmonic average of the squared quantizer,
+   weighted by the expected squared magnitude of the DCT coefficients.
+  Under the (not quite true) assumption that DCT coefficients are
+   Laplacian-distributed, this preserves the product Q*lambda, where
+   lambda=sqrt(2/sigma**2) is the Laplacian distribution parameter (not to be
+   confused with the lambda used in R-D optimization throughout most of the
+   rest of the code).
+  The value Q*lambda completely determines the entropy of the coefficients.*/
+void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
+ ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt){
+  int qi;
+  int pli;
+  int qti;
+  int ci;
+  for(qti=0;qti<2;qti++)for(qi=0;qi<64;qi++){
+    ogg_int64_t q2;
+    q2=0;
+    for(pli=0;pli<3;pli++){
+      ogg_uint32_t qp;
+      qp=0;
+      for(ci=0;ci<64;ci++){
+        unsigned rq;
+        unsigned qd;
+        qd=_dequant[qi][pli][qti][OC_IZIG_ZAG[ci]];
+        rq=(OC_RPSD[qti][ci]+(qd>>1))/qd;
+        qp+=rq*(ogg_uint32_t)rq;
+      }
+      q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp;
+    }
+    /*qavg=1.0/sqrt(q2).*/
+    _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1;
+  }
+}

+ 27 - 0
Engine/lib/libtheora/lib/enquant.h

@@ -0,0 +1,27 @@
+#if !defined(_enquant_H)
+# define _enquant_H (1)
+# include "quant.h"
+
+typedef struct oc_iquant oc_iquant;
+
+#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1))
+
+/*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0))
+   (i.e., one 16x16->16 mul, 2 shifts, and 2 adds).
+  This is not an approximation; for 16-bit x and d, it is exact.*/
+struct oc_iquant{
+  ogg_int16_t m;
+  ogg_int16_t l;
+};
+
+typedef oc_iquant        oc_iquant_table[64];
+
+
+
+void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo);
+void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo);
+void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
+ ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt);
+
+#endif

+ 422 - 0
Engine/lib/libtheora/lib/fdct.c

@@ -0,0 +1,422 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: fdct.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include "encint.h"
+#include "dct.h"
+
+
+
+/*Performs a forward 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 from the orthonormal version of the
+   transform.
+  _y: The buffer to store the result in.
+      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
+  _x: The input coefficients.
+      Every 8th entry is used (e.g., from a column of an 8x8 block).*/
+static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){
+  int t0;
+  int t1;
+  int t2;
+  int t3;
+  int t4;
+  int t5;
+  int t6;
+  int t7;
+  int r;
+  int s;
+  int u;
+  int v;
+  /*Stage 1:*/
+  /*0-7 butterfly.*/
+  t0=_x[0<<3]+(int)_x[7<<3];
+  t7=_x[0<<3]-(int)_x[7<<3];
+  /*1-6 butterfly.*/
+  t1=_x[1<<3]+(int)_x[6<<3];
+  t6=_x[1<<3]-(int)_x[6<<3];
+  /*2-5 butterfly.*/
+  t2=_x[2<<3]+(int)_x[5<<3];
+  t5=_x[2<<3]-(int)_x[5<<3];
+  /*3-4 butterfly.*/
+  t3=_x[3<<3]+(int)_x[4<<3];
+  t4=_x[3<<3]-(int)_x[4<<3];
+  /*Stage 2:*/
+  /*0-3 butterfly.*/
+  r=t0+t3;
+  t3=t0-t3;
+  t0=r;
+  /*1-2 butterfly.*/
+  r=t1+t2;
+  t2=t1-t2;
+  t1=r;
+  /*6-5 butterfly.*/
+  r=t6+t5;
+  t5=t6-t5;
+  t6=r;
+  /*Stages 3 and 4 are where all the approximation occurs.
+    These are chosen to be as close to an exact inverse of the approximations
+     made in the iDCT as possible, while still using mostly 16-bit arithmetic.
+    We use some 16x16->32 signed MACs, but those still commonly execute in 1
+     cycle on a 16-bit DSP.
+    For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of
+     t5=(OC_C4S4*s>>16).
+    That is, applying the latter to the output of the former will recover t5
+     exactly (over the valid input range of t5, -23171...23169).
+    We increase the rounding bias to 0xB500 in this particular case so that
+     errors inverting the subsequent butterfly are not one-sided (e.g., the
+     mean error is very close to zero).
+    The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0.
+    The fDCT of an all-zeros block will still not be zero, because of the
+     biases we added at the very beginning of the process, but it will be close
+     enough that it is guaranteed to round to zero.*/
+  /*Stage 3:*/
+  /*4-5 butterfly.*/
+  s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1;
+  r=t4+s;
+  t5=t4-s;
+  t4=r;
+  /*7-6 butterfly.*/
+  s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1;
+  r=t7+s;
+  t6=t7-s;
+  t7=r;
+  /*Stage 4:*/
+  /*0-1 butterfly.*/
+  r=(27146*t0+0x4000>>16)+t0+(t0!=0);
+  s=(27146*t1+0xB500>>16)+t1+(t1!=0);
+  u=r+s>>1;
+  v=r-u;
+  _y[0]=u;
+  _y[4]=v;
+  /*3-2 rotation by 6pi/16*/
+  u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0);
+  s=(OC_C6S2*u>>16)-t2;
+  v=(s*21600+0x2800>>18)+s+(s!=0);
+  _y[2]=u;
+  _y[6]=v;
+  /*6-5 rotation by 3pi/16*/
+  u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0);
+  s=t6-(OC_C5S3*u>>16);
+  v=(s*26568+0x3400>>17)+s+(s!=0);
+  _y[5]=u;
+  _y[3]=v;
+  /*7-4 rotation by 7pi/16*/
+  u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0);
+  s=(OC_C7S1*u>>16)-t4;
+  v=(s*20539+0x3000>>20)+s+(s!=0);
+  _y[1]=u;
+  _y[7]=v;
+}
+
+void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64],
+ const ogg_int16_t _x[64]){
+  (*_enc->opt_vtable.fdct8x8)(_y,_x);
+}
+
+/*Performs a forward 8x8 Type-II DCT transform.
+  The output is scaled by a factor of 4 relative to the orthonormal version
+   of the transform.
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients. */
+void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  int                i;
+  /*Add two extra bits of working precision to improve accuracy; any more and
+     we could overflow.*/
+  for(i=0;i<64;i++)w[i]=_x[i]<<2;
+  /*These biases correct for some systematic error that remains in the full
+     fDCT->iDCT round trip.*/
+  w[0]+=(w[0]!=0)+1;
+  w[1]++;
+  w[8]--;
+  /*Transform columns of w into rows of _y.*/
+  for(in=w,out=_y,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
+  /*Transform columns of _y into rows of w.*/
+  for(in=_y,out=w,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
+  /*Round the result back to the external working precision (which is still
+     scaled by four relative to the orthogonal result).
+    TODO: We should just update the external working precision.*/
+  for(i=0;i<64;i++)_y[i]=w[i]+2>>2;
+}
+
+
+
+/*This does not seem to outperform simple LFE border padding before MC.
+  It yields higher PSNR, but much higher bitrate usage.*/
+#if 0
+typedef struct oc_extension_info oc_extension_info;
+
+
+
+/*Information needed to pad boundary blocks.
+  We multiply each row/column by an extension matrix that fills in the padding
+   values as a linear combination of the active values, so that an equivalent
+   number of coefficients are forced to zero.
+  This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as
+   little as 7 multiplies.
+  We compute the extension matrices for every possible shape in advance, as
+   there are only 35.
+  The coefficients for all matrices are stored in a single array to take
+   advantage of the overlap and repetitiveness of many of the shapes.
+  A similar technique is applied to the offsets into this array.
+  This reduces the required table storage by about 48%.
+  See tools/extgen.c for details.
+  We could conceivably do the same for all 256 possible shapes.*/
+struct oc_extension_info{
+  /*The mask of the active pixels in the shape.*/
+  short                     mask;
+  /*The number of active pixels in the shape.*/
+  short                     na;
+  /*The extension matrix.
+    This is (8-na)xna*/
+  const ogg_int16_t *const *ext;
+  /*The pixel indices: na active pixels followed by 8-na padding pixels.*/
+  unsigned char             pi[8];
+  /*The coefficient indices: na unconstrained coefficients followed by 8-na
+     coefficients to be forced to zero.*/
+  unsigned char             ci[8];
+};
+
+
+/*The number of shapes we need.*/
+#define OC_NSHAPES   (35)
+
+static const ogg_int16_t OC_EXT_COEFFS[229]={
+  0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF,
+  0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000,
+  0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000,
+  0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345,
+  0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59,
+  0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59,
+  0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225,
+  0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C,
+  0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882,
+  0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768,
+  0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185,
+  0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59,
+  0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC,
+  0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C,
+  0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7,
+  0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C,
+  0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF,
+  0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C,
+  0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D,
+  0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04,
+  0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546,
+  0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587,
+  0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4,
+  0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC,
+  0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB,
+  0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08,
+  0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12,
+  0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A,
+  0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC
+};
+
+static const ogg_int16_t *const OC_EXT_ROWS[96]={
+  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,
+  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   6,
+  OC_EXT_COEFFS+  27,OC_EXT_COEFFS+  38,OC_EXT_COEFFS+  43,OC_EXT_COEFFS+  32,
+  OC_EXT_COEFFS+  49,OC_EXT_COEFFS+  58,OC_EXT_COEFFS+  67,OC_EXT_COEFFS+  71,
+  OC_EXT_COEFFS+  62,OC_EXT_COEFFS+  53,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
+  OC_EXT_COEFFS+  14,OC_EXT_COEFFS+  13,OC_EXT_COEFFS+  76,OC_EXT_COEFFS+  81,
+  OC_EXT_COEFFS+  86,OC_EXT_COEFFS+  91,OC_EXT_COEFFS+  96,OC_EXT_COEFFS+  98,
+  OC_EXT_COEFFS+  93,OC_EXT_COEFFS+  88,OC_EXT_COEFFS+  83,OC_EXT_COEFFS+  78,
+  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,
+  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
+  OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108,
+  OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+  16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141,
+  OC_EXT_COEFFS+  20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116,
+  OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167,
+  OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149,
+  OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177,
+  OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197,
+  OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215,
+  OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+  24,OC_EXT_COEFFS+ 204,
+  OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199,
+  OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,
+  OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+ 134,
+  OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223,
+  OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221
+};
+
+static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={
+  {0x7F,7,OC_EXT_ROWS+  0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}},
+  {0xFE,7,OC_EXT_ROWS+  7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}},
+  {0x3F,6,OC_EXT_ROWS+  8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}},
+  {0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}},
+  {0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}},
+  {0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}},
+  {0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}},
+  {0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}},
+  {0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}},
+  {0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}},
+  {0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}},
+  {0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}},
+  {0x01,1,OC_EXT_ROWS+  0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}},
+  {0x80,1,OC_EXT_ROWS+  0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}},
+  {0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}},
+  {0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}},
+  {0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}},
+  {0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}},
+  {0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}},
+  {0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}},
+  {0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}},
+  {0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}},
+  {0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}},
+  {0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}},
+  {0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}},
+  {0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}},
+  {0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}},
+  {0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}},
+  {0x40,1,OC_EXT_ROWS+  0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x20,1,OC_EXT_ROWS+  0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x10,1,OC_EXT_ROWS+  0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x08,1,OC_EXT_ROWS+  0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x04,1,OC_EXT_ROWS+  0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}},
+  {0x02,1,OC_EXT_ROWS+  0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}}
+};
+
+
+
+/*Pads a single column of a partial block and then performs a forward Type-II
+   DCT on the result.
+  The input is scaled by a factor of 4 and biased appropriately for the current
+   fDCT implementation.
+  The output is scaled by an additional factor of 2 from the orthonormal
+   version of the transform.
+  _y: The buffer to store the result in.
+      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
+  _x: The input coefficients.
+      Every 8th entry is used (e.g., from a column of an 8x8 block).
+  _e: The extension information for the shape.*/
+static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x,
+ const oc_extension_info *_e){
+  const unsigned char *pi;
+  int                  na;
+  na=_e->na;
+  pi=_e->pi;
+  if(na==1){
+    int ci;
+    /*While the branch below is still correct for shapes with na==1, we can
+       perform the entire transform with just 1 multiply in this case instead
+       of 23.*/
+    _y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]])));
+    for(ci=1;ci<8;ci++)_y[ci]=0;
+  }
+  else{
+    const ogg_int16_t *const *ext;
+    int                       zpi;
+    int                       api;
+    int                       nz;
+    /*First multiply by the extension matrix to compute the padding values.*/
+    nz=8-na;
+    ext=_e->ext;
+    for(zpi=0;zpi<nz;zpi++){
+      ogg_int32_t v;
+      v=0;
+      for(api=0;api<na;api++){
+        v+=ext[zpi][api]*(ogg_int32_t)(_x[pi[api]<<3]<<1);
+      }
+      _x[pi[na+zpi]<<3]=(ogg_int16_t)(v+0x8000>>16)+1>>1;
+    }
+    oc_fdct8(_y,_x);
+  }
+}
+
+/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the
+   border of the picture region.
+  This method ONLY works with rectangular regions.
+  _border: A description of which pixels are inside the border.
+  _y:      The buffer to store the result in.
+           This may be the same as _x.
+  _x:      The input pixel values.
+           Pixel values outside the border will be ignored.*/
+void oc_fdct8x8_border(const oc_border_info *_border,
+ ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  ogg_int16_t             *in;
+  ogg_int16_t             *out;
+  ogg_int16_t              w[64];
+  ogg_int64_t              mask;
+  const oc_extension_info *cext;
+  const oc_extension_info *rext;
+  int                      cmask;
+  int                      rmask;
+  int                      ri;
+  int                      ci;
+  /*Identify the shapes of the non-zero rows and columns.*/
+  rmask=cmask=0;
+  mask=_border->mask;
+  for(ri=0;ri<8;ri++){
+    /*This aggregation is _only_ correct for rectangular masks.*/
+    cmask|=((mask&0xFF)!=0)<<ri;
+    rmask|=mask&0xFF;
+    mask>>=8;
+  }
+  /*Find the associated extension info for these shapes.*/
+  if(cmask==0xFF)cext=NULL;
+  else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){
+    /*If we somehow can't find the shape, then just do an unpadded fDCT.
+      It won't be efficient, but it should still be correct.*/
+    if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){
+      oc_enc_fdct8x8_c(_y,_x);
+      return;
+    }
+  }
+  if(rmask==0xFF)rext=NULL;
+  else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){
+    /*If we somehow can't find the shape, then just do an unpadded fDCT.
+      It won't be efficient, but it should still be correct.*/
+    if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){
+      oc_enc_fdct8x8_c(_y,_x);
+      return;
+    }
+  }
+  /*Add two extra bits of working precision to improve accuracy; any more and
+     we could overflow.*/
+  for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2;
+  /*These biases correct for some systematic error that remains in the full
+     fDCT->iDCT round trip.
+    We can safely add them before padding, since if these pixel values are
+     overwritten, we didn't care what they were anyway (and the unbiased values
+     will usually yield smaller DCT coefficient magnitudes).*/
+  w[0]+=(w[0]!=0)+1;
+  w[1]++;
+  w[8]--;
+  /*Transform the columns.
+    We can ignore zero columns without a problem.*/
+  in=w;
+  out=_y;
+  if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci);
+  else for(ci=0;ci<8;ci++)if(rmask&(1<<ci))oc_fdct8_ext(out+(ci<<3),in+ci,cext);
+  /*Transform the rows.
+    We transform even rows that are supposedly zero, because rounding errors
+     may make them slightly non-zero, and this will give a more precise
+     reconstruction with very small quantizers.*/
+  in=_y;
+  out=w;
+  if(rext==NULL)for(ri=0;ri<8;ri++)oc_fdct8(out+(ri<<3),in+ri);
+  else for(ri=0;ri<8;ri++)oc_fdct8_ext(out+(ri<<3),in+ri,rext);
+  /*Round the result back to the external working precision (which is still
+     scaled by four relative to the orthogonal result).
+    TODO: We should just update the external working precision.*/
+  for(ci=0;ci<64;ci++)_y[ci]=w[ci]+2>>2;
+}
+#endif

+ 87 - 0
Engine/lib/libtheora/lib/fragment.c

@@ -0,0 +1,87 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: fragment.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <string.h>
+#include "internal.h"
+
+void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride){
+  (*_state->opt_vtable.frag_copy)(_dst,_src,_ystride);
+}
+
+void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){
+  int i;
+  for(i=8;i-->0;){
+    memcpy(_dst,_src,8*sizeof(*_dst));
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst,
+ int _ystride,const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue);
+}
+
+void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride,
+ const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128);
+    _dst+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue);
+}
+
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]);
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride,
+ const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue);
+}
+
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1));
+    _dst+=_ystride;
+    _src1+=_ystride;
+    _src2+=_ystride;
+  }
+}
+
+void oc_restore_fpu(const oc_theora_state *_state){
+  _state->opt_vtable.restore_fpu();
+}
+
+void oc_restore_fpu_c(void){}

+ 489 - 0
Engine/lib/libtheora/lib/huffdec.c

@@ -0,0 +1,489 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: huffdec.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "huffdec.h"
+#include "decint.h"
+
+
+/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
+#define _ogg_offsetof(_type,_field)\
+ ((size_t)((char *)&((_type *)0)->_field-(char *)0))
+
+/*The number of internal tokens associated with each of the spec tokens.*/
+static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={
+  1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8
+};
+
+/*The map from external spec-defined tokens to internal tokens.
+  This is constructed so that any extra bits read with the original token value
+   can be masked off the least significant bits of its internal token index.
+  In addition, all of the tokens which require additional extra bits are placed
+   at the start of the list, and grouped by type.
+  OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
+   giving it index 0 may simplify comparisons on some architectures.
+  These requirements require some substantial reordering.*/
+static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  15,
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  16,
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  17,
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
+  88,
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
+  80,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+   1,
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+   0,
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
+  48,
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
+  14,
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  56,
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  57,
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  58,
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  59,
+  /*OC_DCT_VAL_CAT2 (1 extra bit)*/
+  60,
+  62,
+  64,
+  66,
+  /*OC_DCT_VAL_CAT3 (2 extra bits)*/
+  68,
+  /*OC_DCT_VAL_CAT4 (3 extra bits)*/
+  72,
+  /*OC_DCT_VAL_CAT5 (4 extra bits)*/
+   2,
+  /*OC_DCT_VAL_CAT6 (5 extra bits)*/
+   4,
+  /*OC_DCT_VAL_CAT7 (6 extra bits)*/
+   6,
+  /*OC_DCT_VAL_CAT8 (10 extra bits)*/
+   8,
+  /*OC_DCT_RUN_CAT1A (1 extra bit)*/
+  18,
+  20,
+  22,
+  24,
+  26,
+  /*OC_DCT_RUN_CAT1B (3 extra bits)*/
+  32,
+  /*OC_DCT_RUN_CAT1C (4 extra bits)*/
+  12,
+  /*OC_DCT_RUN_CAT2A (2 extra bits)*/
+  28,
+  /*OC_DCT_RUN_CAT2B (3 extra bits)*/
+  40
+};
+
+/*These three functions are really part of the bitpack.c module, but
+   they are only used here.
+  Declaring local static versions so they can be inlined saves considerable
+   function call overhead.*/
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  window=_b->window;
+  available=_b->bits;
+  ptr=_b->ptr;
+  stop=_b->stop;
+  /*This version of _refill() doesn't bother setting eof because we won't
+     check for it after we've started decoding DCT tokens.*/
+  if(ptr>=stop)available=OC_LOTS_OF_BITS;
+  while(available<=OC_PB_WINDOW_SIZE-8){
+    available+=8;
+    window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+    if(ptr>=stop)available=OC_LOTS_OF_BITS;
+  }
+  _b->ptr=ptr;
+  if(_bits>available)window|=*ptr>>(available&7);
+  _b->bits=available;
+  return window;
+}
+
+
+/*Read in bits without advancing the bit pointer.
+  Here we assume 0<=_bits&&_bits<=32.*/
+static long oc_pack_look(oc_pack_buf *_b,int _bits){
+  oc_pb_window window;
+  int          available;
+  long         result;
+  window=_b->window;
+  available=_b->bits;
+  if(_bits==0)return 0;
+  if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits);
+  result=window>>OC_PB_WINDOW_SIZE-_bits;
+  return result;
+}
+
+/*Advance the bit pointer.*/
+static void oc_pack_adv(oc_pack_buf *_b,int _bits){
+  /*We ignore the special cases for _bits==0 and _bits==32 here, since they are
+     never used actually used.
+    OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read
+     32 bits in a single go, and would require a 32 GB lookup table (assuming
+     8 byte pointers, since 4 byte pointers couldn't fit such a table).*/
+  _b->window<<=_bits;
+  _b->bits-=_bits;
+}
+
+
+/*The log_2 of the size of a lookup table is allowed to grow to relative to
+   the number of unique nodes it contains.
+  E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
+   wasted (each node will have an amortized cost of at most 20 bytes when using
+   4-byte pointers).
+  Larger numbers can decode tokens with fewer read operations, while smaller
+   numbers may save more space (requiring as little as 8 bytes amortized per
+   node, though there will be more nodes).
+  With a sample file:
+  32233473 read calls are required when no tree collapsing is done (100.0%).
+  19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
+  11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
+  10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
+  10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
+  Since a value of 1 gets us the vast majority of the speed-up with only a
+   small amount of wasted memory, this is what we use.*/
+#define OC_HUFF_SLUSH (1)
+
+
+/*Determines the size in bytes of a Huffman tree node that represents a
+   subtree of depth _nbits.
+  _nbits: The depth of the subtree.
+          If this is 0, the node is a leaf node.
+          Otherwise 1<<_nbits pointers are allocated for children.
+  Return: The number of bytes required to store the node.*/
+static size_t oc_huff_node_size(int _nbits){
+  size_t size;
+  size=_ogg_offsetof(oc_huff_node,nodes);
+  if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
+  return size;
+}
+
+static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){
+  oc_huff_node *ret;
+  ret=(oc_huff_node *)*_storage;
+  ret->nbits=(unsigned char)_nbits;
+  (*_storage)+=_size;
+  return ret;
+}
+
+
+/*Determines the size in bytes of a Huffman tree.
+  _nbits: The depth of the subtree.
+          If this is 0, the node is a leaf node.
+          Otherwise storage for 1<<_nbits pointers are added for children.
+  Return: The number of bytes required to store the tree.*/
+static size_t oc_huff_tree_size(const oc_huff_node *_node){
+  size_t size;
+  size=oc_huff_node_size(_node->nbits);
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;i+=1<<_node->nbits-_node->nodes[i]->depth){
+      size+=oc_huff_tree_size(_node->nodes[i]);
+    }
+  }
+  return size;
+}
+
+
+/*Unpacks a sub-tree from the given buffer.
+  _opb:      The buffer to unpack from.
+  _binodes:  The nodes to store the sub-tree in.
+  _nbinodes: The number of nodes available for the sub-tree.
+  Return: 0 on success, or a negative value on error.*/
+static int oc_huff_tree_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_binodes,int _nbinodes){
+  oc_huff_node *binode;
+  long          bits;
+  int           nused;
+  if(_nbinodes<1)return TH_EBADHEADER;
+  binode=_binodes;
+  nused=0;
+  bits=oc_pack_read1(_opb);
+  if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+  /*Read an internal node:*/
+  if(!bits){
+    int ret;
+    nused++;
+    binode->nbits=1;
+    binode->depth=1;
+    binode->nodes[0]=_binodes+nused;
+    ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
+    if(ret>=0){
+      nused+=ret;
+      binode->nodes[1]=_binodes+nused;
+      ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
+    }
+    if(ret<0)return ret;
+    nused+=ret;
+  }
+  /*Read a leaf node:*/
+  else{
+    int ntokens;
+    int token;
+    int i;
+    bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
+    if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+    /*Find out how many internal tokens we translate this external token into.*/
+    ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits];
+    if(_nbinodes<2*ntokens-1)return TH_EBADHEADER;
+    /*Fill in a complete binary tree pointing to the internal tokens.*/
+    for(i=1;i<ntokens;i<<=1){
+      int j;
+      binode=_binodes+nused;
+      nused+=i;
+      for(j=0;j<i;j++){
+        binode[j].nbits=1;
+        binode[j].depth=1;
+        binode[j].nodes[0]=_binodes+nused+2*j;
+        binode[j].nodes[1]=_binodes+nused+2*j+1;
+      }
+    }
+    /*And now the leaf nodes with those tokens.*/
+    token=OC_DCT_TOKEN_MAP[bits];
+    for(i=0;i<ntokens;i++){
+      binode=_binodes+nused++;
+      binode->nbits=0;
+      binode->depth=1;
+      binode->token=token+i;
+    }
+  }
+  return nused;
+}
+
+/*Finds the depth of shortest branch of the given sub-tree.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The smallest depth of a leaf node in this sub-tree.
+          0 indicates this sub-tree is a leaf node.*/
+static int oc_huff_tree_mindepth(oc_huff_node *_binode){
+  int depth0;
+  int depth1;
+  if(_binode->nbits==0)return 0;
+  depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
+  depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
+  return OC_MINI(depth0,depth1)+1;
+}
+
+/*Finds the number of internal nodes at a given depth, plus the number of
+   leaves at that depth or shallower.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The number of entries that would be contained in a jump table of the
+           given depth.*/
+static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
+  if(_binode->nbits==0||_depth<=0)return 1;
+  else{
+    return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
+     oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
+  }
+}
+
+/*Makes a copy of the given Huffman tree.
+  _node: The Huffman tree to copy.
+  Return: The copy of the Huffman tree.*/
+static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node,
+ char **_storage){
+  oc_huff_node *ret;
+  ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits);
+  ret->depth=_node->depth;
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    int inext;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;){
+      ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage);
+      inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
+      while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
+    }
+  }
+  else ret->token=_node->token;
+  return ret;
+}
+
+static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){
+  size_t size;
+  int    mindepth;
+  int    depth;
+  int    loccupancy;
+  int    occupancy;
+  if(_binode->nbits!=0&&_depth>0){
+    return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+
+     oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1);
+  }
+  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  occupancy=1<<mindepth;
+  do{
+    loccupancy=occupancy;
+    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+  }
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+  depth--;
+  size=oc_huff_node_size(depth);
+  if(depth>0){
+    size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1);
+    size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1);
+  }
+  return size;
+}
+
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
+ char **_storage);
+
+/*Fills the given nodes table with all the children in the sub-tree at the
+   given depth.
+  The nodes in the sub-tree with a depth less than that stored in the table
+   are freed.
+  The sub-tree must be binary and complete up until the given depth.
+  _nodes:  The nodes table to fill.
+  _binode: The root of the sub-tree to fill it with.
+           _binode->nbits must be 0 or 1.
+  _level:  The current level in the table.
+           0 indicates that the current node should be stored, regardless of
+            whether it is a leaf node or an internal node.
+  _depth:  The depth of the nodes to fill the table with, relative to their
+            parent.*/
+static void oc_huff_node_fill(oc_huff_node **_nodes,
+ oc_huff_node *_binode,int _level,int _depth,char **_storage){
+  if(_level<=0||_binode->nbits==0){
+    int i;
+    _binode->depth=(unsigned char)(_depth-_level);
+    _nodes[0]=oc_huff_tree_collapse(_binode,_storage);
+    for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
+  }
+  else{
+    _level--;
+    oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage);
+    _nodes+=1<<_level;
+    oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage);
+  }
+}
+
+/*Finds the largest complete sub-tree rooted at the current node and collapses
+   it into a single node.
+  This procedure is then applied recursively to all the children of that node.
+  _binode: The root of the sub-tree to collapse.
+           _binode->nbits must be 0 or 1.
+  Return: The new root of the collapsed sub-tree.*/
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
+ char **_storage){
+  oc_huff_node *root;
+  size_t        size;
+  int           mindepth;
+  int           depth;
+  int           loccupancy;
+  int           occupancy;
+  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  occupancy=1<<mindepth;
+  do{
+    loccupancy=occupancy;
+    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+  }
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+  depth--;
+  if(depth<=1)return oc_huff_tree_copy(_binode,_storage);
+  size=oc_huff_node_size(depth);
+  root=oc_huff_node_init(_storage,size,depth);
+  root->depth=_binode->depth;
+  oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage);
+  return root;
+}
+
+/*Unpacks a set of Huffman trees, and reduces them to a collapsed
+   representation.
+  _opb:   The buffer to unpack the trees from.
+  _nodes: The table to fill with the Huffman trees.
+  Return: 0 on success, or a negative value on error.*/
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    oc_huff_node  nodes[511];
+    char         *storage;
+    size_t        size;
+    int           ret;
+    /*Unpack the full tree into a temporary buffer.*/
+    ret=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes));
+    if(ret<0)return ret;
+    /*Figure out how big the collapsed tree will be.*/
+    size=oc_huff_tree_collapse_size(nodes,0);
+    storage=(char *)_ogg_calloc(1,size);
+    if(storage==NULL)return TH_EFAULT;
+    /*And collapse it.*/
+    _nodes[i]=oc_huff_tree_collapse(nodes,&storage);
+  }
+  return 0;
+}
+
+/*Makes a copy of the given set of Huffman trees.
+  _dst: The array to store the copy in.
+  _src: The array of trees to copy.*/
+int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+ const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    size_t  size;
+    char   *storage;
+    size=oc_huff_tree_size(_src[i]);
+    storage=(char *)_ogg_calloc(1,size);
+    if(storage==NULL){
+      while(i-->0)_ogg_free(_dst[i]);
+      return TH_EFAULT;
+    }
+    _dst[i]=oc_huff_tree_copy(_src[i],&storage);
+  }
+  return 0;
+}
+
+/*Frees the memory used by a set of Huffman trees.
+  _nodes: The array of trees to free.*/
+void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
+}
+
+/*Unpacks a single token using the given Huffman tree.
+  _opb:  The buffer to unpack the token from.
+  _node: The tree to unpack the token with.
+  Return: The token value.*/
+int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node){
+  long bits;
+  while(_node->nbits!=0){
+    bits=oc_pack_look(_opb,_node->nbits);
+    _node=_node->nodes[bits];
+    oc_pack_adv(_opb,_node->depth);
+  }
+  return _node->token;
+}

+ 7 - 6
Engine/lib/libtheora/lib/dec/huffdec.h → Engine/lib/libtheora/lib/huffdec.h

@@ -5,19 +5,20 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-    last mod: $Id: huffdec.h 15400 2008-10-15 12:10:58Z tterribe $
+    last mod: $Id: huffdec.h 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
 #if !defined(_huffdec_H)
 #if !defined(_huffdec_H)
 # define _huffdec_H (1)
 # define _huffdec_H (1)
 # include "huffman.h"
 # include "huffman.h"
+# include "bitpack.h"
 
 
 
 
 
 
@@ -75,17 +76,17 @@ struct oc_huff_node{
     The ACTUAL size of this array is 1<<nbits, despite what the declaration
     The ACTUAL size of this array is 1<<nbits, despite what the declaration
      below claims.
      below claims.
     The exception is that for leaf nodes the size is 0.*/
     The exception is that for leaf nodes the size is 0.*/
-  oc_huff_node  *nodes[1];
+  oc_huff_node  *nodes[2];
 };
 };
 
 
 
 
 
 
-int oc_huff_trees_unpack(oggpack_buffer *_opb,
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
  oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
  oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
-void oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
  const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
  const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
 void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
 void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
-int oc_huff_token_decode(oggpack_buffer *_opb,const oc_huff_node *_node);
+int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node);
 
 
 
 
 #endif
 #endif

+ 90 - 321
Engine/lib/libtheora/lib/enc/encapiwrapper.c → Engine/lib/libtheora/lib/huffenc.c

@@ -1,19 +1,11 @@
+#include <stdlib.h>
 #include <string.h>
 #include <string.h>
-#include "theora/theoraenc.h"
-#include "theora/theora.h"
-#include "codec_internal.h"
-#include "../dec/ocintrin.h"
+#include <ogg/ogg.h>
+#include "huffenc.h"
 
 
-/*Wrapper to translate the new API into the old API.
-  Eventually we need to convert the old functions to support the new API
-   natively and do the translation the other way.
-  theora-exp already the necessary code to do so.*/
 
 
 
 
-
-/*The default Huffman codes used for VP3.1.
-  It's kind of useless to include this, as TH_ENCCTL_SET_HUFFMAN_CODES is not
-   actually implemented in the old encoder, but it's part of the public API.*/
+/*The default Huffman codes used for VP3.1.*/
 const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={
 const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={
   {
   {
     {0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8},
     {0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8},
@@ -819,323 +811,100 @@ const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={
 
 
 
 
 
 
-static void th_info2theora_info(theora_info *_ci,const th_info *_info){
-  _ci->version_major=_info->version_major;
-  _ci->version_minor=_info->version_minor;
-  _ci->version_subminor=_info->version_subminor;
-  _ci->width=_info->frame_width;
-  _ci->height=_info->frame_height;
-  _ci->frame_width=_info->pic_width;
-  _ci->frame_height=_info->pic_height;
-  _ci->offset_x=_info->pic_x;
-  _ci->offset_y=_info->pic_y;
-  _ci->fps_numerator=_info->fps_numerator;
-  _ci->fps_denominator=_info->fps_denominator;
-  _ci->aspect_numerator=_info->aspect_numerator;
-  _ci->aspect_denominator=_info->aspect_denominator;
-  switch(_info->colorspace){
-    case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break;
-    case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break;
-    default:_ci->colorspace=OC_CS_UNSPECIFIED;break;
-  }
-  switch(_info->pixel_fmt){
-    case TH_PF_420:_ci->pixelformat=OC_PF_420;break;
-    case TH_PF_422:_ci->pixelformat=OC_PF_422;break;
-    case TH_PF_444:_ci->pixelformat=OC_PF_444;break;
-    default:_ci->pixelformat=OC_PF_RSVD;
-  }
-  _ci->target_bitrate=_info->target_bitrate;
-  _ci->quality=_info->quality;
-  _ci->codec_setup=NULL;
-  /*Defaults from old encoder_example... eventually most of these should go
-     away when we make the encoder no longer use them.*/
-  _ci->dropframes_p=0;
-  _ci->keyframe_auto_p=1;
-  _ci->keyframe_frequency=1<<_info->keyframe_granule_shift;
-  _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift;
-  _ci->keyframe_data_target_bitrate=
-   _info->target_bitrate+(_info->target_bitrate>>1);
-  _ci->keyframe_auto_threshold=80;
-  _ci->keyframe_mindistance=8;
-  _ci->noise_sensitivity=1;
-  _ci->sharpness=0;
-  _ci->quick_p=1;
-}
-
-static int _ilog(unsigned _v){
-  int ret;
-  for(ret=0;_v;ret++)_v>>=1;
-  return ret;
-}
-
-
+/*A description of a Huffman code value used when encoding the tree.*/
+typedef struct{
+  /*The bit pattern, left-shifted so that the MSB of all patterns is
+     aligned.*/
+  ogg_uint32_t pattern;
+  /*The amount the bit pattern was shifted.*/
+  int          shift;
+  /*The token this bit pattern represents.*/
+  int          token;
+}oc_huff_entry;
 
 
-struct th_enc_ctx{
-  /*This is required at the start of the struct for the common functions to
-     work.*/
-  th_info        info;
-  /*The actual encoder.*/
-  theora_state   state;
-  /*A temporary buffer for input frames.
-    This is needed if the U and V strides differ, or padding is required.*/
-  unsigned char *buf;
-};
 
 
 
 
-th_enc_ctx *th_encode_alloc(const th_info *_info){
-  theora_info  ci;
-  th_enc_ctx  *enc;
-  th_info2theora_info(&ci,_info);
-  /*Do a bunch of checks the new API does, but the old one didn't.*/
-  if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
-   _info->frame_width>=0x100000||_info->frame_height>=0x100000||
-   _info->pic_x+_info->pic_width>_info->frame_width||
-   _info->pic_y+_info->pic_height>_info->frame_height||
-   _info->pic_x>255||
-   _info->frame_height-_info->pic_height-_info->pic_y>255||
-   _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
-   _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
-    enc=NULL;
-  }
-  else{
-    enc=(th_enc_ctx *)_ogg_malloc(sizeof(*enc));
-    if(theora_encode_init(&enc->state,&ci)<0){
-      _ogg_free(enc);
-      enc=NULL;
-    }
-    else{
-      if(_info->frame_width>_info->pic_width||
-       _info->frame_height>_info->pic_height){
-        enc->buf=_ogg_malloc((_info->frame_width*_info->frame_height+
-         ((_info->frame_width>>!(_info->pixel_fmt&1))*
-         (_info->frame_height>>!(_info->pixel_fmt&2))<<1))*sizeof(*enc->buf));
-      }
-      else enc->buf=NULL;
-      memcpy(&enc->info,_info,sizeof(enc->info));
-      /*Overwrite values theora_encode_init() can change; don't trust the user.*/
-      enc->info.version_major=ci.version_major;
-      enc->info.version_minor=ci.version_minor;
-      enc->info.version_subminor=ci.version_subminor;
-      enc->info.quality=ci.quality;
-      enc->info.target_bitrate=ci.target_bitrate;
-      enc->info.fps_numerator=ci.fps_numerator;
-      enc->info.fps_denominator=ci.fps_denominator;
-      enc->info.keyframe_granule_shift=_ilog(ci.keyframe_frequency_force-1);
-    }
-  }
-  return enc;
-}
-
-int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){
-  return theora_control(&_enc->state,_req,_buf,_buf_sz);
-}
-
-int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_comments,
- ogg_packet *_op){
-  theora_state *te;
-  CP_INSTANCE  *cpi;
-  if(_enc==NULL||_op==NULL)return OC_FAULT;
-  te=&_enc->state;
-  cpi=(CP_INSTANCE *)te->internal_encode;
-  switch(cpi->doneflag){
-    case -3:{
-      theora_encode_header(te,_op);
-      return -cpi->doneflag++;
-    }break;
-    case -2:{
-      if(_comments==NULL)return OC_FAULT;
-      theora_encode_comment((theora_comment *)_comments,_op);
-      /*The old API does not require a theora_state struct when writing the
-         comment header, so it can't use its internal buffer and relies on the
-         application to free it.
-        The old documentation is wrong on this subject, and this breaks on
-         Windows when linking against multiple versions of libc (which is
-         almost always done when, e.g., using DLLs built with mingw32).
-        The new API _does_ require a th_enc_ctx, and states that libtheora owns
-         the memory.
-        Thus we move the contents of this packet into our internal
-         oggpack_buffer so it can be properly reclaimed.*/
-      oggpackB_reset(cpi->oggbuffer);
-      oggpackB_writecopy(cpi->oggbuffer,_op->packet,_op->bytes*8);
-      _ogg_free(_op->packet);
-      _op->packet=oggpackB_get_buffer(cpi->oggbuffer);
-      return -cpi->doneflag++;
-    }break;
-    case -1:{
-      theora_encode_tables(te,_op);
-      return -cpi->doneflag++;
-    }break;
-    case 0:return 0;
-    default:return OC_EINVAL;
-  }
+/*Compares two oc_huff_entry structures by their bit patterns.
+  _c1: The first entry to compare.
+  _c2: The second entry to compare.
+  Return: <0 if _c1<_c2, >0 if _c1>_c2.*/
+static int huff_entry_cmp(const void *_c1,const void *_c2){
+  ogg_uint32_t b1;
+  ogg_uint32_t b2;
+  b1=((const oc_huff_entry *)_c1)->pattern;
+  b2=((const oc_huff_entry *)_c2)->pattern;
+  return b1<b2?-1:b1>b2?1:0;
 }
 }
 
 
-/*Copies the picture region of the _src image plane into _dst and pads the rest
-   of _dst using a diffusion extension method.
-  We could do much better (e.g., the DCT-based low frequency extension method
-   in theora-exp's fdct.c) if we were to pad after motion compensation, but
-   that would require significant changes to the encoder.*/
-static unsigned char *th_encode_copy_pad_plane(th_img_plane *_dst,
- unsigned char *_buf,th_img_plane *_src,
- ogg_uint32_t _pic_x,ogg_uint32_t _pic_y,
- ogg_uint32_t _pic_width,ogg_uint32_t _pic_height){
-  size_t buf_sz;
-  _dst->width=_src->width;
-  _dst->height=_src->height;
-  _dst->stride=_src->width;
-  _dst->data=_buf;
-  buf_sz=_dst->width*_dst->height*sizeof(*_dst->data);
-  /*If we have _no_ data, just encode a dull green.*/
-  if(_pic_width==0||_pic_height==0)memset(_dst->data,0,buf_sz);
-  else{
-    unsigned char *dst;
-    unsigned char *src;
-    ogg_uint32_t   x;
-    ogg_uint32_t   y;
-    int            dstride;
-    int            sstride;
-    /*Step 1: Copy the data we do have.*/
-    dstride=_dst->stride;
-    sstride=_src->stride;
-    dst=_dst->data+_pic_y*dstride+_pic_x;
-    src=_src->data+_pic_y*sstride+_pic_x;
-    for(y=0;y<_pic_height;y++){
-      memcpy(dst,src,_pic_width);
-      dst+=dstride;
-      src+=sstride;
+/*Encodes a description of the given Huffman tables.
+  Although the codes are stored in the encoder as flat arrays, in the bit
+   stream and in the decoder they are structured as a tree.
+  This function recovers the tree structure from the flat array and then
+   writes it out.
+  Note that the codes MUST form a Huffman code, and not merely a prefix-free
+   code, since the binary tree is assumed to be full.
+  _opb:   The buffer to store the tree in.
+  _codes: The Huffman tables to pack.
+  Return: 0 on success, or a negative value if one of the given Huffman tables
+   does not form a full, prefix-free code.*/
+int oc_huff_codes_pack(oggpack_buffer *_opb,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    oc_huff_entry entries[TH_NDCT_TOKENS];
+    int           bpos;
+    int           maxlen;
+    int           mask;
+    int           j;
+    /*First, find the maximum code length so we can align all the bit
+       patterns.*/
+    maxlen=_codes[i][0].nbits;
+    for(j=1;j<TH_NDCT_TOKENS;j++){
+      maxlen=OC_MAXI(_codes[i][j].nbits,maxlen);
     }
     }
-    /*Step 2: Copy the border into any blocks that are 100% padding.
-      There's probably smarter things we could do than this.*/
-    /*Left side.*/
-    for(x=_pic_x;x-->0;){
-      dst=_dst->data+_pic_y*dstride+x;
-      for(y=0;y<_pic_height;y++){
-        dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1]+
-         (dst+(dstride&-(y+1<_pic_height)))[1]+2>>2;
-        dst+=dstride;
-      }
-    }
-    /*Right side.*/
-    for(x=_pic_x+_pic_width;x<_dst->width;x++){
-      dst=_dst->data+_pic_y*dstride+x-1;
-      for(y=0;y<_pic_height;y++){
-        dst[1]=(dst[0]<<1)+(dst-(dstride&-(y>0)))[0]+
-         (dst+(dstride&-(y+1<_pic_height)))[0]+2>>2;
-        dst+=dstride;
-      }
-    }
-    /*Top.*/
-    dst=_dst->data+_pic_y*dstride;
-    for(y=_pic_y;y-->0;){
-      for(x=0;x<_dst->width;x++){
-        (dst-dstride)[x]=(dst[x]<<1)+dst[x-(x>0)]+dst[x+(x+1<_dst->width)]+2>>2;
-      }
-      dst-=dstride;
+    mask=(1<<(maxlen>>1)<<(maxlen+1>>1))-1;
+    /*Copy over the codes into our temporary workspace.
+      The bit patterns are aligned, and the original entry each code is from
+       is stored as well.*/
+    for(j=0;j<TH_NDCT_TOKENS;j++){
+      entries[j].shift=maxlen-_codes[i][j].nbits;
+      entries[j].pattern=_codes[i][j].pattern<<entries[j].shift&mask;
+      entries[j].token=j;
     }
     }
-    /*Bottom.*/
-    dst=_dst->data+(_pic_y+_pic_height)*dstride;
-    for(y=_pic_y+_pic_height;y<_dst->height;y++){
-      for(x=0;x<_dst->width;x++){
-        dst[x]=((dst-dstride)[x]<<1)+(dst-dstride)[x-(x>0)]+
-         (dst-dstride)[x+(x+1<_dst->width)]+2>>2;
+    /*Sort the codes into ascending order.
+      This is the order the leaves of the tree will be traversed.*/
+    qsort(entries,TH_NDCT_TOKENS,sizeof(entries[0]),huff_entry_cmp);
+    /*For each leaf of the tree:*/
+    bpos=maxlen;
+    for(j=0;j<TH_NDCT_TOKENS;j++){
+      int bit;
+      /*If this code has any bits at all.*/
+      if(entries[j].shift<maxlen){
+        /*Descend into the tree, writing a bit for each branch.*/
+        for(;bpos>entries[j].shift;bpos--)oggpackB_write(_opb,0,1);
+        /*Mark this as a leaf node, and write its value.*/
+        oggpackB_write(_opb,1,1);
+        oggpackB_write(_opb,entries[j].token,5);
+        /*For each 1 branch we've descended, back up the tree until we reach a
+           0 branch.*/
+        bit=1<<bpos;
+        for(;entries[j].pattern&bit;bpos++)bit<<=1;
+        /*Validate the code.*/
+        if(j+1<TH_NDCT_TOKENS){
+          mask=~(bit-1)<<1;
+          /*The next entry should have a 1 bit where we had a 0, and should
+             match our code above that bit.
+            This verifies both fullness and prefix-freeness simultaneously.*/
+          if(!(entries[j+1].pattern&bit)||
+           (entries[j].pattern&mask)!=(entries[j+1].pattern&mask)){
+            return TH_EINVAL;
+          }
+        }
+        /*If there are no more codes, we should have ascended back to the top
+           of the tree.*/
+        else if(bpos<maxlen)return TH_EINVAL;
       }
       }
-      dst+=dstride;
     }
     }
   }
   }
-  _buf+=buf_sz;
-  return _buf;
-}
-
-int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr){
-  CP_INSTANCE     *cpi;
-  theora_state    *te;
-  th_img_plane    *pycbcr;
-  th_ycbcr_buffer  ycbcr;
-  yuv_buffer       yuv;
-  ogg_uint32_t     pic_width;
-  ogg_uint32_t     pic_height;
-  int              hdec;
-  int              vdec;
-  int              ret;
-  if(_enc==NULL||_ycbcr==NULL)return OC_FAULT;
-  te=&_enc->state;
-  /*theora_encode_YUVin() does not bother to check uv_width and uv_height, and
-     then uses them.
-    This is arguably okay (it will most likely lead to a crash if they're
-     wrong, which will make the developer who passed them fix the problem), but
-     our API promises to return an error code instead.*/
-  cpi=(CP_INSTANCE *)te->internal_encode;
-  hdec=!(cpi->pb.info.pixelformat&1);
-  vdec=!(cpi->pb.info.pixelformat&2);
-  if(_ycbcr[0].width!=cpi->pb.info.width||
-   _ycbcr[0].height!=cpi->pb.info.height||
-   _ycbcr[1].width!=_ycbcr[0].width>>hdec||
-   _ycbcr[1].height!=_ycbcr[0].height>>vdec||
-   _ycbcr[2].width!=_ycbcr[1].width||_ycbcr[2].height!=_ycbcr[1].height){
-    return OC_EINVAL;
-  }
-  pic_width=cpi->pb.info.frame_width;
-  pic_height=cpi->pb.info.frame_height;
-  /*We can only directly use the input buffer if no padding is required (since
-     the new API is documented not to use values outside the picture region)
-     and if the strides for the Cb and Cr planes are the same, since the old
-     API had no way to specify different ones.*/
-  if(_ycbcr[0].width==pic_width&&_ycbcr[0].height==pic_height&&
-   _ycbcr[1].stride==_ycbcr[2].stride){
-    pycbcr=_ycbcr;
-  }
-  else{
-    unsigned char *buf;
-    int            pic_x;
-    int            pic_y;
-    int            pli;
-    pic_x=cpi->pb.info.offset_x;
-    pic_y=cpi->pb.info.offset_y;
-    if(_ycbcr[0].width>pic_width||_ycbcr[0].height>pic_height){
-      buf=th_encode_copy_pad_plane(ycbcr+0,_enc->buf,_ycbcr+0,
-       pic_x,pic_y,pic_width,pic_height);
-    }
-    else{
-      /*If only the strides differ, we can still avoid copying the luma plane.*/
-      memcpy(ycbcr+0,_ycbcr+0,sizeof(ycbcr[0]));
-      if(_enc->buf==NULL){
-        _enc->buf=(unsigned char *)_ogg_malloc(
-         (_ycbcr[1].width*_ycbcr[1].height<<1)*sizeof(*_enc->buf));
-      }
-      buf=_enc->buf;
-    }
-    for(pli=1;pli<3;pli++){
-      int x0;
-      int y0;
-      x0=pic_x>>hdec;
-      y0=pic_y>>vdec;
-      buf=th_encode_copy_pad_plane(ycbcr+pli,buf,_ycbcr+pli,
-       x0,y0,(pic_x+pic_width+hdec>>hdec)-x0,(pic_y+pic_height+vdec>>vdec)-y0);
-    }
-    pycbcr=ycbcr;
-  }
-  yuv.y_width=pycbcr[0].width;
-  yuv.y_height=pycbcr[0].height;
-  yuv.uv_width=pycbcr[1].width;
-  yuv.uv_height=pycbcr[1].height;
-  yuv.y_stride=pycbcr[0].stride;
-  yuv.y=pycbcr[0].data;
-  yuv.uv_stride=pycbcr[1].stride;
-  yuv.u=pycbcr[1].data;
-  yuv.v=pycbcr[2].data;
-  ret=theora_encode_YUVin(te,&yuv);
-  return ret;
-}
-
-int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op){
-  if(_enc==NULL)return OC_FAULT;
-  return theora_encode_packetout(&_enc->state,_last,_op);
-}
-
-void th_encode_free(th_enc_ctx *_enc){
-  if(_enc!=NULL){
-    theora_clear(&_enc->state);
-    _ogg_free(_enc->buf);
-    _ogg_free(_enc);
-  }
+  return 0;
 }
 }

+ 19 - 0
Engine/lib/libtheora/lib/huffenc.h

@@ -0,0 +1,19 @@
+#if !defined(_huffenc_H)
+# define _huffenc_H (1)
+# include "huffman.h"
+
+
+
+typedef th_huff_code                  th_huff_table[TH_NDCT_TOKENS];
+
+
+
+extern const th_huff_code
+ TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+
+
+
+int oc_huff_codes_pack(oggpack_buffer *_opb,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]);
+
+#endif

+ 3 - 3
Engine/lib/libtheora/lib/dec/huffman.h → Engine/lib/libtheora/lib/huffman.h

@@ -5,13 +5,13 @@
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
  *                                                                  *
  *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  *                                                                  *
  ********************************************************************
  ********************************************************************
 
 
   function:
   function:
-    last mod: $Id: huffman.h 15400 2008-10-15 12:10:58Z tterribe $
+    last mod: $Id: huffman.h 16503 2009-08-22 18:14:02Z giles $
 
 
  ********************************************************************/
  ********************************************************************/
 
 
@@ -65,6 +65,6 @@
 #define OC_NDCT_RUN_MAX          (32)
 #define OC_NDCT_RUN_MAX          (32)
 #define OC_NDCT_RUN_CAT1A_MAX    (28)
 #define OC_NDCT_RUN_CAT1A_MAX    (28)
 
 
-extern const int OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
+extern const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
 
 
 #endif
 #endif

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio