12 years ago · 6cbd75f5fb
--- a/headers/zglHeader.h
+++ b/headers/zglHeader.h
@@ -3,7 +3,7 @@
 
				 /*--------------------------------*/
			
 
				 /*                                */
			
 
				 /* version:  0.3.6                */
			
 
				-/* date:     2012.12.16           */
			
 
				+/* date:     2012.12.21           */
			
 
				 /* license:  zlib                 */
			
 
				 /* homepage: http://zengl.org     */
			
 
				 /*                                */
			
--- a/headers/zglHeader.pas
+++ b/headers/zglHeader.pas
@@ -3,7 +3,7 @@
 
				 {--------------------------------}
			
 
				 {                                }
			
 
				 { version:  0.3.6                }
			
 
				-{ date:     2012.12.16           }
			
 
				+{ date:     2012.12.21           }
			
 
				 { license:  zlib                 }
			
 
				 { homepage: http://zengl.org     }
			
 
				 {                                }
			
--- a/lib/theora/delphi/yuv422rgb8888.obj
+++ b/lib/theora/delphi/yuv422rgb8888.obj
--- a/lib/theora/delphi/yuv444rgb8888.obj
+++ b/lib/theora/delphi/yuv444rgb8888.obj
--- a/lib/theora/yuv2rgba/Makefile
+++ b/lib/theora/yuv2rgba/Makefile
@@ -5,30 +5,46 @@ ANDROID_GCC=$(ANDROID_NDK)/toolchains/arm-linux-androideabi-4.4.3/prebuilt/linux
 
				 linux:
			
 
				 	gcc yuv2bgr16tab.c -c -o../i386-linux/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				 	gcc yuv420rgb8888.c -c -o../i386-linux/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				+	gcc yuv422rgb8888.c -c -o../i386-linux/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				+	gcc yuv444rgb8888.c -c -o../i386-linux/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				 	gcc yuv2bgr16tab.c -c -o../x86_64-linux/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
			
 
				 	gcc yuv420rgb8888.c -c -o../x86_64-linux/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
			
 
				+	gcc yuv422rgb8888.c -c -o../x86_64-linux/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
			
 
				+	gcc yuv444rgb8888.c -c -o../x86_64-linux/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
			
 
				 
			
 
				 android:
			
 
				 	$(ANDROID_GCC) yuv2bgr16tab.c -c -o../arm-linux/yuv2bgr16tab.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
			
 
				 	$(ANDROID_GCC) yuv420rgb8888.s -c -o../arm-linux/yuv420rgb8888.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
			
 
				+	$(ANDROID_GCC) yuv422rgb8888.s -c -o../arm-linux/yuv422rgb8888.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
			
 
				+	$(ANDROID_GCC) yuv444rgb8888.s -c -o../arm-linux/yuv444rgb8888.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
			
 
				 
			
 
				 win32:
			
 
				 	i486-mingw32-gcc yuv2bgr16tab.c -c -o../i386-win32/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686
			
 
				 	i486-mingw32-gcc yuv420rgb8888.c -c -o../i386-win32/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686
			
 
				+	i486-mingw32-gcc yuv422rgb8888.c -c -o../i386-win32/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686
			
 
				+	i486-mingw32-gcc yuv444rgb8888.c -c -o../i386-win32/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686
			
 
				 
			
 
				 win64:
			
 
				 	x86_64-w64-mingw32-gcc yuv2bgr16tab.c -c -o../x86_64-win64/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99
			
 
				 	x86_64-w64-mingw32-gcc yuv420rgb8888.c -c -o../x86_64-win64/yuv420rgb8888.o -O2 -s -ffast-math -std=c99
			
 
				+	x86_64-w64-mingw32-gcc yuv422rgb8888.c -c -o../x86_64-win64/yuv422rgb8888.o -O2 -s -ffast-math -std=c99
			
 
				+	x86_64-w64-mingw32-gcc yuv444rgb8888.c -c -o../x86_64-win64/yuv444rgb8888.o -O2 -s -ffast-math -std=c99
			
 
				 
			
 
				 wince:
			
 
				 	arm-wince-cegcc-gcc yuv2bgr16tab.c -c -o../arm-wince/yuv2bgr16tab.o -I./ -O2 -s -ffast-math
			
 
				 	arm-wince-cegcc-gcc yuv420rgb8888.c -c -o../arm-wince/yuv420rgb8888.o -I./ -O2 -s -ffast-math
			
 
				+	arm-wince-cegcc-gcc yuv422rgb8888.c -c -o../arm-wince/yuv422rgb8888.o -I./ -O2 -s -ffast-math
			
 
				+	arm-wince-cegcc-gcc yuv444rgb8888.c -c -o../arm-wince/yuv444rgb8888.o -I./ -O2 -s -ffast-math
			
 
				 
			
 
				 macos:
			
 
				 	gcc yuv2bgr16tab.c -c -o../i386-darwin/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				 	gcc yuv420rgb8888.c -c -o../i386-darwin/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				+	gcc yuv422rgb8888.c -c -o../i386-darwin/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				+	gcc yuv444rgb8888.c -c -o../i386-darwin/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				 	powerpc-apple-darwin10-gcc-4.2.1 yuv2bgr16tab.c -c -o../powerpc-darwin/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99
			
 
				 	powerpc-apple-darwin10-gcc-4.2.1 yuv420rgb8888.c -c -o../powerpc-darwin/yuv420rgb8888.o -O2 -s -ffast-math -std=c99
			
 
				+	powerpc-apple-darwin10-gcc-4.2.1 yuv422rgb8888.c -c -o../powerpc-darwin/yuv422rgb8888.o -O2 -s -ffast-math -std=c99
			
 
				+	powerpc-apple-darwin10-gcc-4.2.1 yuv444rgb8888.c -c -o../powerpc-darwin/yuv444rgb8888.o -O2 -s -ffast-math -std=c99
			
 
				 
			
 
				 ios:
			
 
				 	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv6 yuv2bgr16tab.c -c -o../arm-darwin/yuv2bgr16tab_armv6.o -O2 -s -ffast-math -std=c99
			
@@ -41,5 +57,17 @@ ios:
 
				 	lipo -create ../arm-darwin/yuv420rgb8888_armv6.o ../arm-darwin/yuv420rgb8888_armv7.o -output ../arm-darwin/yuv420rgb8888.o
			
 
				 	rm -f ../arm-darwin/yuv420rgb8888_armv6.o
			
 
				 	rm -f ../arm-darwin/yuv420rgb8888_armv7.o
			
 
				+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv6 yuv422rgb8888.c -c -o../arm-darwin/yuv422rgb8888_armv6.o -O2 -s -ffast-math -std=c99
			
 
				+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv7 yuv422rgb8888.c -c -o../arm-darwin/yuv422rgb8888_armv7.o -O2 -s -ffast-math -std=c99
			
 
				+	lipo -create ../arm-darwin/yuv422rgb8888_armv6.o ../arm-darwin/yuv422rgb8888_armv7.o -output ../arm-darwin/yuv422rgb8888.o
			
 
				+	rm -f ../arm-darwin/yuv422rgb8888_armv6.o
			
 
				+	rm -f ../arm-darwin/yuv422rgb8888_armv7.o
			
 
				+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv6 yuv444rgb8888.c -c -o../arm-darwin/yuv444rgb8888_armv6.o -O2 -s -ffast-math -std=c99
			
 
				+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv7 yuv444rgb8888.c -c -o../arm-darwin/yuv444rgb8888_armv7.o -O2 -s -ffast-math -std=c99
			
 
				+	lipo -create ../arm-darwin/yuv444rgb8888_armv6.o ../arm-darwin/yuv444rgb8888_armv7.o -output ../arm-darwin/yuv444rgb8888.o
			
 
				+	rm -f ../arm-darwin/yuv444rgb8888_armv6.o
			
 
				+	rm -f ../arm-darwin/yuv444rgb8888_armv7.o
			
 
				 	gcc yuv2bgr16tab.c -c -o../i386-iphonesim/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				 	gcc yuv420rgb8888.c -c -o../i386-iphonesim/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				+	gcc yuv422rgb8888.c -c -o../i386-iphonesim/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
 
				+	gcc yuv444rgb8888.c -c -o../i386-iphonesim/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
			
--- a/lib/theora/yuv2rgba/compile_bcc32.bat
+++ b/lib/theora/yuv2rgba/compile_bcc32.bat
@@ -0,0 +1,4 @@
 
				+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv2bgr16tab.obj yuv2bgr16tab.c

			
 
				+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv420rgb8888.obj yuv420rgb8888.c

			
 
				+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv422rgb8888.obj yuv422rgb8888.c

			
 
				+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv444rgb8888.obj yuv444rgb8888.c
			
--- a/lib/theora/yuv2rgba/yuv422rgb8888.c
+++ b/lib/theora/yuv2rgba/yuv422rgb8888.c
@@ -0,0 +1,182 @@
 
				+/* YUV-> RGB conversion code.
			
 
				+ *
			
 
				+ * Copyright (C) 2011 Robin Watts ([email protected]) for Pinknoise
			
 
				+ * Productions Ltd.
			
 
				+ *
			
 
				+ * Licensed under the BSD license. See 'COPYING' for details of
			
 
				+ * (non-)warranty.
			
 
				+ *
			
 
				+ *
			
 
				+ * The algorithm used here is based heavily on one created by Sophie Wilson
			
 
				+ * of Acorn/e-14/Broadcomm. Many thanks.
			
 
				+ *
			
 
				+ * Additional tweaks (in the fast fixup code) are from Paul Gardiner.
			
 
				+ *
			
 
				+ * The old implementation of YUV -> RGB did:
			
 
				+ *
			
 
				+ * R = CLAMP((Y-16)*1.164 +           1.596*V)
			
 
				+ * G = CLAMP((Y-16)*1.164 - 0.391*U - 0.813*V)
			
 
				+ * B = CLAMP((Y-16)*1.164 + 2.018*U          )
			
 
				+ *
			
 
				+ * We're going to bend that here as follows:
			
 
				+ *
			
 
				+ * R = CLAMP(y +           1.596*V)
			
 
				+ * G = CLAMP(y - 0.383*U - 0.813*V)
			
 
				+ * B = CLAMP(y + 1.976*U          )
			
 
				+ *
			
 
				+ * where y = 0               for       Y <=  16,
			
 
				+ *       y = (  Y-16)*1.164, for  16 < Y <= 239,
			
 
				+ *       y = (239-16)*1.164, for 239 < Y
			
 
				+ *
			
 
				+ * i.e. We clamp Y to the 16 to 239 range (which it is supposed to be in
			
 
				+ * anyway). We then pick the B_U factor so that B never exceeds 511. We then
			
 
				+ * shrink the G_U factor in line with that to avoid a colour shift as much as
			
 
				+ * possible.
			
 
				+ *
			
 
				+ * We're going to use tables to do it faster, but rather than doing it using
			
 
				+ * 5 tables as as the above suggests, we're going to do it using just 3.
			
 
				+ *
			
 
				+ * We do this by working in parallel within a 32 bit word, and using one
			
 
				+ * table each for Y U and V.
			
 
				+ *
			
 
				+ * Source Y values are    0 to 255, so    0.. 260 after scaling
			
 
				+ * Source U values are -128 to 127, so  -49.. 49(G), -253..251(B) after
			
 
				+ * Source V values are -128 to 127, so -204..203(R), -104..103(G) after
			
 
				+ *
			
 
				+ * So total summed values:
			
 
				+ * -223 <= R <= 481, -173 <= G <= 431, -253 <= B < 511
			
 
				+ *
			
 
				+ * We need to pack R G and B into a 32 bit word, and because of Bs range we
			
 
				+ * need 2 bits above the valid range of B to detect overflow, and another one
			
 
				+ * to detect the sense of the overflow. We therefore adopt the following
			
 
				+ * representation:
			
 
				+ *
			
 
				+ * osGGGGGgggggosBBBBBbbbosRRRRRrrr
			
 
				+ *
			
 
				+ * Each such word breaks down into 3 ranges.
			
 
				+ *
			
 
				+ * osGGGGGggggg   osBBBBBbbb   osRRRRRrrr
			
 
				+ *
			
 
				+ * Thus we have 8 bits for each B and R table entry, and 10 bits for G (good
			
 
				+ * as G is the most noticable one). The s bit for each represents the sign,
			
 
				+ * and o represents the overflow.
			
 
				+ *
			
 
				+ * For R and B we pack the table by taking the 11 bit representation of their
			
 
				+ * values, and toggling bit 10 in the U and V tables.
			
 
				+ *
			
 
				+ * For the green case we calculate 4*G (thus effectively using 10 bits for the
			
 
				+ * valid range) truncate to 12 bits. We toggle bit 11 in the Y table.
			
 
				+ */
			
 
				+
			
 
				+#include "yuv2rgb.h"
			
 
				+
			
 
				+enum
			
 
				+{
			
 
				+    FLAGS         = 0x40080100
			
 
				+};
			
 
				+
			
 
				+#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)])
			
 
				+#define READY(Y)    tables[Y]
			
 
				+#define FIXUP(Y)                 \
			
 
				+do {                             \
			
 
				+    int tmp = (Y) & FLAGS;       \
			
 
				+    if (tmp != 0)                \
			
 
				+    {                            \
			
 
				+        tmp  -= tmp>>8;          \
			
 
				+        (Y)  |= tmp;             \
			
 
				+        tmp   = FLAGS & ~(Y>>1); \
			
 
				+        (Y)  += tmp>>8;          \
			
 
				+    }                            \
			
 
				+} while (0 == 1)
			
 
				+
			
 
				+#define STORE(Y,DSTPTR)         \
			
 
				+do {                            \
			
 
				+    *(DSTPTR)++ = (Y);          \
			
 
				+    *(DSTPTR)++ = (Y)>>22;      \
			
 
				+    *(DSTPTR)++ = (Y)>>11;      \
			
 
				+    *(DSTPTR)++ = 255;          \
			
 
				+} while (0 == 1)
			
 
				+
			
 
				+void yuv422_2_rgb8888(uint8_t  *dst_ptr,
			
 
				+                const uint8_t  *y_ptr,
			
 
				+                const uint8_t  *u_ptr,
			
 
				+                const uint8_t  *v_ptr,
			
 
				+                      int32_t   width,
			
 
				+                      int32_t   height,
			
 
				+                      int32_t   y_span,
			
 
				+                      int32_t   uv_span,
			
 
				+                      int32_t   dst_span,
			
 
				+                const uint32_t *tables,
			
 
				+                      int32_t   dither)
			
 
				+{
			
 
				+    height -= 1;
			
 
				+    while (height > 0)
			
 
				+    {
			
 
				+        height -= width<<16;
			
 
				+        height += 1<<16;
			
 
				+        while (height < 0)
			
 
				+        {
			
 
				+            /* Do top row pair */
			
 
				+            uint32_t uv, y0, y1;
			
 
				+
			
 
				+            uv  = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y0  = uv + READY(*y_ptr++);
			
 
				+            y1  = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            FIXUP(y1);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+            STORE(y1, dst_ptr);
			
 
				+            height += (2<<16);
			
 
				+        }
			
 
				+        if ((height>>16) == 0)
			
 
				+        {
			
 
				+            /* Trailing top row pix */
			
 
				+            uint32_t uv, y0;
			
 
				+
			
 
				+            uv = READUV(*u_ptr,*v_ptr);
			
 
				+            y0 = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+        }
			
 
				+        dst_ptr += dst_span-width*4;
			
 
				+        y_ptr   += y_span-width;
			
 
				+        u_ptr   += uv_span-(width>>1);
			
 
				+        v_ptr   += uv_span-(width>>1);
			
 
				+        height = (height<<16)>>16;
			
 
				+        height -= 1;
			
 
				+        if (height == 0)
			
 
				+            break;
			
 
				+        height -= width<<16;
			
 
				+        height += 1<<16;
			
 
				+        while (height < 0)
			
 
				+        {
			
 
				+            /* Do second row pair */
			
 
				+            uint32_t uv, y0, y1;
			
 
				+
			
 
				+            uv  = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y0  = uv + READY(*y_ptr++);
			
 
				+            y1  = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            FIXUP(y1);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+            STORE(y1, dst_ptr);
			
 
				+            height += (2<<16);
			
 
				+        }
			
 
				+        if ((height>>16) == 0)
			
 
				+        {
			
 
				+            /* Trailing bottom row pix */
			
 
				+            uint32_t uv, y0;
			
 
				+
			
 
				+            uv = READUV(*u_ptr,*v_ptr);
			
 
				+            y0 = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+        }
			
 
				+        dst_ptr += dst_span-width*4;
			
 
				+        y_ptr   += y_span-width;
			
 
				+        u_ptr   += uv_span-(width>>1);
			
 
				+        v_ptr   += uv_span-(width>>1);
			
 
				+        height = (height<<16)>>16;
			
 
				+        height -= 1;
			
 
				+    }
			
 
				+}
			
--- a/lib/theora/yuv2rgba/yuv444rgb8888.c
+++ b/lib/theora/yuv2rgba/yuv444rgb8888.c
@@ -0,0 +1,184 @@
 
				+/* YUV-> RGB conversion code.
			
 
				+ *
			
 
				+ * Copyright (C) 2011 Robin Watts ([email protected]) for Pinknoise
			
 
				+ * Productions Ltd.
			
 
				+ *
			
 
				+ * Licensed under the BSD license. See 'COPYING' for details of
			
 
				+ * (non-)warranty.
			
 
				+ *
			
 
				+ *
			
 
				+ * The algorithm used here is based heavily on one created by Sophie Wilson
			
 
				+ * of Acorn/e-14/Broadcomm. Many thanks.
			
 
				+ *
			
 
				+ * Additional tweaks (in the fast fixup code) are from Paul Gardiner.
			
 
				+ *
			
 
				+ * The old implementation of YUV -> RGB did:
			
 
				+ *
			
 
				+ * R = CLAMP((Y-16)*1.164 +           1.596*V)
			
 
				+ * G = CLAMP((Y-16)*1.164 - 0.391*U - 0.813*V)
			
 
				+ * B = CLAMP((Y-16)*1.164 + 2.018*U          )
			
 
				+ *
			
 
				+ * We're going to bend that here as follows:
			
 
				+ *
			
 
				+ * R = CLAMP(y +           1.596*V)
			
 
				+ * G = CLAMP(y - 0.383*U - 0.813*V)
			
 
				+ * B = CLAMP(y + 1.976*U          )
			
 
				+ *
			
 
				+ * where y = 0               for       Y <=  16,
			
 
				+ *       y = (  Y-16)*1.164, for  16 < Y <= 239,
			
 
				+ *       y = (239-16)*1.164, for 239 < Y
			
 
				+ *
			
 
				+ * i.e. We clamp Y to the 16 to 239 range (which it is supposed to be in
			
 
				+ * anyway). We then pick the B_U factor so that B never exceeds 511. We then
			
 
				+ * shrink the G_U factor in line with that to avoid a colour shift as much as
			
 
				+ * possible.
			
 
				+ *
			
 
				+ * We're going to use tables to do it faster, but rather than doing it using
			
 
				+ * 5 tables as as the above suggests, we're going to do it using just 3.
			
 
				+ *
			
 
				+ * We do this by working in parallel within a 32 bit word, and using one
			
 
				+ * table each for Y U and V.
			
 
				+ *
			
 
				+ * Source Y values are    0 to 255, so    0.. 260 after scaling
			
 
				+ * Source U values are -128 to 127, so  -49.. 49(G), -253..251(B) after
			
 
				+ * Source V values are -128 to 127, so -204..203(R), -104..103(G) after
			
 
				+ *
			
 
				+ * So total summed values:
			
 
				+ * -223 <= R <= 481, -173 <= G <= 431, -253 <= B < 511
			
 
				+ *
			
 
				+ * We need to pack R G and B into a 32 bit word, and because of Bs range we
			
 
				+ * need 2 bits above the valid range of B to detect overflow, and another one
			
 
				+ * to detect the sense of the overflow. We therefore adopt the following
			
 
				+ * representation:
			
 
				+ *
			
 
				+ * osGGGGGgggggosBBBBBbbbosRRRRRrrr
			
 
				+ *
			
 
				+ * Each such word breaks down into 3 ranges.
			
 
				+ *
			
 
				+ * osGGGGGggggg   osBBBBBbbb   osRRRRRrrr
			
 
				+ *
			
 
				+ * Thus we have 8 bits for each B and R table entry, and 10 bits for G (good
			
 
				+ * as G is the most noticable one). The s bit for each represents the sign,
			
 
				+ * and o represents the overflow.
			
 
				+ *
			
 
				+ * For R and B we pack the table by taking the 11 bit representation of their
			
 
				+ * values, and toggling bit 10 in the U and V tables.
			
 
				+ *
			
 
				+ * For the green case we calculate 4*G (thus effectively using 10 bits for the
			
 
				+ * valid range) truncate to 12 bits. We toggle bit 11 in the Y table.
			
 
				+ */
			
 
				+
			
 
				+#include "yuv2rgb.h"
			
 
				+
			
 
				+enum
			
 
				+{
			
 
				+    FLAGS         = 0x40080100
			
 
				+};
			
 
				+
			
 
				+#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)])
			
 
				+#define READY(Y)    tables[Y]
			
 
				+#define FIXUP(Y)                 \
			
 
				+do {                             \
			
 
				+    int tmp = (Y) & FLAGS;       \
			
 
				+    if (tmp != 0)                \
			
 
				+    {                            \
			
 
				+        tmp  -= tmp>>8;          \
			
 
				+        (Y)  |= tmp;             \
			
 
				+        tmp   = FLAGS & ~(Y>>1); \
			
 
				+        (Y)  += tmp>>8;          \
			
 
				+    }                            \
			
 
				+} while (0 == 1)
			
 
				+
			
 
				+#define STORE(Y,DSTPTR)         \
			
 
				+do {                            \
			
 
				+    *(DSTPTR)++ = (Y);          \
			
 
				+    *(DSTPTR)++ = (Y)>>22;      \
			
 
				+    *(DSTPTR)++ = (Y)>>11;      \
			
 
				+    *(DSTPTR)++ = 255;          \
			
 
				+} while (0 == 1)
			
 
				+
			
 
				+void yuv444_2_rgb8888(uint8_t  *dst_ptr,
			
 
				+                const uint8_t  *y_ptr,
			
 
				+                const uint8_t  *u_ptr,
			
 
				+                const uint8_t  *v_ptr,
			
 
				+                      int32_t   width,
			
 
				+                      int32_t   height,
			
 
				+                      int32_t   y_span,
			
 
				+                      int32_t   uv_span,
			
 
				+                      int32_t   dst_span,
			
 
				+                const uint32_t *tables,
			
 
				+                      int32_t   dither)
			
 
				+{
			
 
				+    height -= 1;
			
 
				+    while (height > 0)
			
 
				+    {
			
 
				+        height -= width<<16;
			
 
				+        height += 1<<16;
			
 
				+        while (height < 0)
			
 
				+        {
			
 
				+            /* Do top row pair */
			
 
				+            uint32_t uv, y0, y1;
			
 
				+
			
 
				+            uv  = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y0  = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+            uv  = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y1  = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y1);
			
 
				+            STORE(y1, dst_ptr);
			
 
				+            height += (2<<16);
			
 
				+        }
			
 
				+        if ((height>>16) == 0)
			
 
				+        {
			
 
				+            /* Trailing top row pix */
			
 
				+            uint32_t uv, y0;
			
 
				+
			
 
				+            uv = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y0 = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+        }
			
 
				+        dst_ptr += dst_span-width*4;
			
 
				+        y_ptr   += y_span-width;
			
 
				+        u_ptr   += uv_span-width;
			
 
				+        v_ptr   += uv_span-width;
			
 
				+        height = (height<<16)>>16;
			
 
				+        height -= 1;
			
 
				+        if (height == 0)
			
 
				+            break;
			
 
				+        height -= width<<16;
			
 
				+        height += 1<<16;
			
 
				+        while (height < 0)
			
 
				+        {
			
 
				+            /* Do second row pair */
			
 
				+            uint32_t uv, y0, y1;
			
 
				+
			
 
				+            uv  = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y0  = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+            uv  = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y1  = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y1);
			
 
				+            STORE(y1, dst_ptr);
			
 
				+            height += (2<<16);
			
 
				+        }
			
 
				+        if ((height>>16) == 0)
			
 
				+        {
			
 
				+            /* Trailing bottom row pix */
			
 
				+            uint32_t uv, y0;
			
 
				+
			
 
				+            uv = READUV(*u_ptr++,*v_ptr++);
			
 
				+            y0 = uv + READY(*y_ptr++);
			
 
				+            FIXUP(y0);
			
 
				+            STORE(y0, dst_ptr);
			
 
				+        }
			
 
				+        dst_ptr += dst_span-width*4;
			
 
				+        y_ptr   += y_span-width;
			
 
				+        u_ptr   += uv_span-width;
			
 
				+        v_ptr   += uv_span-width;
			
 
				+        height = (height<<16)>>16;
			
 
				+        height -= 1;
			
 
				+    }
			
 
				+}
			
--- a/src/zgl_main.pas
+++ b/src/zgl_main.pas
@@ -48,8 +48,8 @@ uses
 
				   zgl_types;
			
 
				 
			
 
				 const
			
 
				-  cs_ZenGL    = 'ZenGL 0.3.6';
			
 
				-  cs_Date     = '2012.12.16';
			
 
				+  cs_ZenGL    = 'ZenGL 0.3.6 [ End of the World ]';
			
 
				+  cs_Date     = '2012.12.21';
			
 
				   cv_major    = 0;
			
 
				   cv_minor    = 3;
			
 
				   cv_revision = 6;
			
--- a/src/zgl_video_theora.pas
+++ b/src/zgl_video_theora.pas
@@ -27,6 +27,8 @@ unit zgl_video_theora;
 
				 
			
 
				 {$L yuv2bgr16tab}
			
 
				 {$L yuv420rgb8888}
			
 
				+{$L yuv422rgb8888}
			
 
				+{$L yuv444rgb8888}
			
 
				 
			
 
				 interface
			
 
				 
			
@@ -56,8 +58,10 @@ type
 
				     Time        : Double;
			
 
				   end;
			
 
				 
			
 
				+function  get_yuv2bgr565_table : pcuint32; cdecl; external;
			
 
				 procedure yuv420_2_rgb8888( dst_ptr, y_ptr, u_ptr, v_ptr : pcuchar; width, height, y_span, uv_span, dst_span : cint32; tables : pcuint32; dither : cint32 ); cdecl; external;
			
 
				-function get_yuv2bgr565_table : pcuint32; cdecl; external;
			
 
				+procedure yuv422_2_rgb8888( dst_ptr, y_ptr, u_ptr, v_ptr : pcuchar; width, height, y_span, uv_span, dst_span : cint32; tables : pcuint32; dither : cint32 ); cdecl; external;
			
 
				+procedure yuv444_2_rgb8888( dst_ptr, y_ptr, u_ptr, v_ptr : pcuchar; width, height, y_span, uv_span, dst_span : cint32; tables : pcuint32; dither : cint32 ); cdecl; external;
			
 
				 
			
 
				 var
			
 
				   theoraDecoderOGV : zglTVideoDecoder;
			
@@ -176,8 +180,8 @@ begin
 
				     begin
			
 
				       TheoraData.DecoderCtx := th_decode_alloc( @TheoraData.TheoraInfo, setupInfo );
			
 
				 
			
 
				-      Width     := TheoraData.TheoraInfo.frame_width;
			
 
				-      Height    := TheoraData.TheoraInfo.frame_height;
			
 
				+      Width     := TheoraData.TheoraInfo.pic_width;
			
 
				+      Height    := TheoraData.TheoraInfo.pic_height;
			
 
				       FrameRate := TheoraData.TheoraInfo.fps_numerator / TheoraData.TheoraInfo.fps_denominator;
			
 
				       Duration  := 0;
			
 
				       Frames    := 0;
			
@@ -219,26 +223,9 @@ begin
 
				     end else
			
 
				       Result := FALSE;
			
 
				 
			
 
				-  if TheoraData.TheoraInfo.pixel_fmt <> TH_PF_420 Then
			
 
				-    begin
			
 
				-      log_Add( 'Theora: Pixel format is not supported(YUV 4:2:0 is needed)' );
			
 
				-      Result := FALSE;
			
 
				-    end;
			
 
				-
			
 
				   th_comment_clear( @comment );
			
 
				 end;
			
 
				 
			
 
				-function clamp( value : Integer ) : Integer; {$IFDEF USE_INLINE} inline; {$ENDIF}
			
 
				-begin
			
 
				-  if value > 2088960 Then
			
 
				-    Result := 2088960
			
 
				-  else
			
 
				-    if value < 0 Then
			
 
				-      Result := 0
			
 
				-    else
			
 
				-      Result := value;
			
 
				-end;
			
 
				-
			
 
				 function theora_Update( var TheoraData : zglTTheoraData; Time : Double; Data : PByteArray ) : Integer;
			
 
				   var
			
 
				     ycbcr      : th_ycbcr_buffer;
			
@@ -280,12 +267,25 @@ begin
 
				         th_decode_ycbcr_out( TheoraData.DecoderCtx, @ycbcr );
			
 
				 
			
 
				         dataOrig := Data;
			
 
				-        INC( PByte( Data ), ( ycbcr[ 0 ].height - 1 ) * ycbcr[ 0 ].width * 4 );
			
 
				-
			
 
				-        yuv420_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
			
 
				-                          ycbcr[ 0 ].width, ycbcr[ 0 ].height,
			
 
				-                          ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -ycbcr[ 0 ].width * 4,
			
 
				-                          get_yuv2bgr565_table(), 0 );
			
 
				+        INC( PByte( Data ), ( TheoraData.TheoraInfo.pic_height - 1 ) * TheoraData.TheoraInfo.pic_width * 4 );
			
 
				+
			
 
				+        case TheoraData.TheoraInfo.pixel_fmt of
			
 
				+          TH_PF_420:
			
 
				+            yuv420_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
			
 
				+                              TheoraData.TheoraInfo.pic_width, TheoraData.TheoraInfo.pic_height,
			
 
				+                              ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -TheoraData.TheoraInfo.pic_width * 4,
			
 
				+                              get_yuv2bgr565_table(), 0 );
			
 
				+          TH_PF_422:
			
 
				+            yuv422_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
			
 
				+                              TheoraData.TheoraInfo.pic_width, TheoraData.TheoraInfo.pic_height,
			
 
				+                              ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -TheoraData.TheoraInfo.pic_width * 4,
			
 
				+                              get_yuv2bgr565_table(), 0 );
			
 
				+          TH_PF_444:
			
 
				+            yuv444_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
			
 
				+                              TheoraData.TheoraInfo.pic_width, TheoraData.TheoraInfo.pic_height,
			
 
				+                              ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -TheoraData.TheoraInfo.pic_width * 4,
			
 
				+                              get_yuv2bgr565_table(), 0 );
			
 
				+        end;
			
 
				 
			
 
				         {$IFDEF ANDROID}
			
 
				         INC( PByte( dataOrig ), 3 );