Browse Source

- support for colorspaces 4:2:2 and 4:4:4 of theora video

git-svn-id: http://zengl.googlecode.com/svn/branches/0.3.x@1950 6573c10b-8653-0410-9706-d32479e959fb
dr.andru 12 years ago
parent
commit
6cbd75f5fb

+ 1 - 1
headers/zglHeader.h

@@ -3,7 +3,7 @@
 /*--------------------------------*/
 /*--------------------------------*/
 /*                                */
 /*                                */
 /* version:  0.3.6                */
 /* version:  0.3.6                */
-/* date:     2012.12.16           */
+/* date:     2012.12.21           */
 /* license:  zlib                 */
 /* license:  zlib                 */
 /* homepage: http://zengl.org     */
 /* homepage: http://zengl.org     */
 /*                                */
 /*                                */

+ 1 - 1
headers/zglHeader.pas

@@ -3,7 +3,7 @@
 {--------------------------------}
 {--------------------------------}
 {                                }
 {                                }
 { version:  0.3.6                }
 { version:  0.3.6                }
-{ date:     2012.12.16           }
+{ date:     2012.12.21           }
 { license:  zlib                 }
 { license:  zlib                 }
 { homepage: http://zengl.org     }
 { homepage: http://zengl.org     }
 {                                }
 {                                }

BIN
lib/theora/delphi/yuv422rgb8888.obj


BIN
lib/theora/delphi/yuv444rgb8888.obj


+ 28 - 0
lib/theora/yuv2rgba/Makefile

@@ -5,30 +5,46 @@ ANDROID_GCC=$(ANDROID_NDK)/toolchains/arm-linux-androideabi-4.4.3/prebuilt/linux
 linux:
 linux:
 	gcc yuv2bgr16tab.c -c -o../i386-linux/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv2bgr16tab.c -c -o../i386-linux/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv420rgb8888.c -c -o../i386-linux/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv420rgb8888.c -c -o../i386-linux/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
+	gcc yuv422rgb8888.c -c -o../i386-linux/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
+	gcc yuv444rgb8888.c -c -o../i386-linux/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv2bgr16tab.c -c -o../x86_64-linux/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
 	gcc yuv2bgr16tab.c -c -o../x86_64-linux/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
 	gcc yuv420rgb8888.c -c -o../x86_64-linux/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
 	gcc yuv420rgb8888.c -c -o../x86_64-linux/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
+	gcc yuv422rgb8888.c -c -o../x86_64-linux/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
+	gcc yuv444rgb8888.c -c -o../x86_64-linux/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -m64 -fPIC
 
 
 android:
 android:
 	$(ANDROID_GCC) yuv2bgr16tab.c -c -o../arm-linux/yuv2bgr16tab.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
 	$(ANDROID_GCC) yuv2bgr16tab.c -c -o../arm-linux/yuv2bgr16tab.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
 	$(ANDROID_GCC) yuv420rgb8888.s -c -o../arm-linux/yuv420rgb8888.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
 	$(ANDROID_GCC) yuv420rgb8888.s -c -o../arm-linux/yuv420rgb8888.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
+	$(ANDROID_GCC) yuv422rgb8888.s -c -o../arm-linux/yuv422rgb8888.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
+	$(ANDROID_GCC) yuv444rgb8888.s -c -o../arm-linux/yuv444rgb8888.o -O2 -s -marm -march=armv5 -Xassembler "-meabi=5" -ffast-math -std=c99 -I$(ANDROID_NDK)/platforms/android-3/arch-arm/usr/include
 
 
 win32:
 win32:
 	i486-mingw32-gcc yuv2bgr16tab.c -c -o../i386-win32/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686
 	i486-mingw32-gcc yuv2bgr16tab.c -c -o../i386-win32/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686
 	i486-mingw32-gcc yuv420rgb8888.c -c -o../i386-win32/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686
 	i486-mingw32-gcc yuv420rgb8888.c -c -o../i386-win32/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686
+	i486-mingw32-gcc yuv422rgb8888.c -c -o../i386-win32/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686
+	i486-mingw32-gcc yuv444rgb8888.c -c -o../i386-win32/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686
 
 
 win64:
 win64:
 	x86_64-w64-mingw32-gcc yuv2bgr16tab.c -c -o../x86_64-win64/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99
 	x86_64-w64-mingw32-gcc yuv2bgr16tab.c -c -o../x86_64-win64/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99
 	x86_64-w64-mingw32-gcc yuv420rgb8888.c -c -o../x86_64-win64/yuv420rgb8888.o -O2 -s -ffast-math -std=c99
 	x86_64-w64-mingw32-gcc yuv420rgb8888.c -c -o../x86_64-win64/yuv420rgb8888.o -O2 -s -ffast-math -std=c99
+	x86_64-w64-mingw32-gcc yuv422rgb8888.c -c -o../x86_64-win64/yuv422rgb8888.o -O2 -s -ffast-math -std=c99
+	x86_64-w64-mingw32-gcc yuv444rgb8888.c -c -o../x86_64-win64/yuv444rgb8888.o -O2 -s -ffast-math -std=c99
 
 
 wince:
 wince:
 	arm-wince-cegcc-gcc yuv2bgr16tab.c -c -o../arm-wince/yuv2bgr16tab.o -I./ -O2 -s -ffast-math
 	arm-wince-cegcc-gcc yuv2bgr16tab.c -c -o../arm-wince/yuv2bgr16tab.o -I./ -O2 -s -ffast-math
 	arm-wince-cegcc-gcc yuv420rgb8888.c -c -o../arm-wince/yuv420rgb8888.o -I./ -O2 -s -ffast-math
 	arm-wince-cegcc-gcc yuv420rgb8888.c -c -o../arm-wince/yuv420rgb8888.o -I./ -O2 -s -ffast-math
+	arm-wince-cegcc-gcc yuv422rgb8888.c -c -o../arm-wince/yuv422rgb8888.o -I./ -O2 -s -ffast-math
+	arm-wince-cegcc-gcc yuv444rgb8888.c -c -o../arm-wince/yuv444rgb8888.o -I./ -O2 -s -ffast-math
 
 
 macos:
 macos:
 	gcc yuv2bgr16tab.c -c -o../i386-darwin/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv2bgr16tab.c -c -o../i386-darwin/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv420rgb8888.c -c -o../i386-darwin/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv420rgb8888.c -c -o../i386-darwin/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
+	gcc yuv422rgb8888.c -c -o../i386-darwin/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
+	gcc yuv444rgb8888.c -c -o../i386-darwin/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	powerpc-apple-darwin10-gcc-4.2.1 yuv2bgr16tab.c -c -o../powerpc-darwin/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99
 	powerpc-apple-darwin10-gcc-4.2.1 yuv2bgr16tab.c -c -o../powerpc-darwin/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99
 	powerpc-apple-darwin10-gcc-4.2.1 yuv420rgb8888.c -c -o../powerpc-darwin/yuv420rgb8888.o -O2 -s -ffast-math -std=c99
 	powerpc-apple-darwin10-gcc-4.2.1 yuv420rgb8888.c -c -o../powerpc-darwin/yuv420rgb8888.o -O2 -s -ffast-math -std=c99
+	powerpc-apple-darwin10-gcc-4.2.1 yuv422rgb8888.c -c -o../powerpc-darwin/yuv422rgb8888.o -O2 -s -ffast-math -std=c99
+	powerpc-apple-darwin10-gcc-4.2.1 yuv444rgb8888.c -c -o../powerpc-darwin/yuv444rgb8888.o -O2 -s -ffast-math -std=c99
 
 
 ios:
 ios:
 	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv6 yuv2bgr16tab.c -c -o../arm-darwin/yuv2bgr16tab_armv6.o -O2 -s -ffast-math -std=c99
 	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv6 yuv2bgr16tab.c -c -o../arm-darwin/yuv2bgr16tab_armv6.o -O2 -s -ffast-math -std=c99
@@ -41,5 +57,17 @@ ios:
 	lipo -create ../arm-darwin/yuv420rgb8888_armv6.o ../arm-darwin/yuv420rgb8888_armv7.o -output ../arm-darwin/yuv420rgb8888.o
 	lipo -create ../arm-darwin/yuv420rgb8888_armv6.o ../arm-darwin/yuv420rgb8888_armv7.o -output ../arm-darwin/yuv420rgb8888.o
 	rm -f ../arm-darwin/yuv420rgb8888_armv6.o
 	rm -f ../arm-darwin/yuv420rgb8888_armv6.o
 	rm -f ../arm-darwin/yuv420rgb8888_armv7.o
 	rm -f ../arm-darwin/yuv420rgb8888_armv7.o
+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv6 yuv422rgb8888.c -c -o../arm-darwin/yuv422rgb8888_armv6.o -O2 -s -ffast-math -std=c99
+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv7 yuv422rgb8888.c -c -o../arm-darwin/yuv422rgb8888_armv7.o -O2 -s -ffast-math -std=c99
+	lipo -create ../arm-darwin/yuv422rgb8888_armv6.o ../arm-darwin/yuv422rgb8888_armv7.o -output ../arm-darwin/yuv422rgb8888.o
+	rm -f ../arm-darwin/yuv422rgb8888_armv6.o
+	rm -f ../arm-darwin/yuv422rgb8888_armv7.o
+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv6 yuv444rgb8888.c -c -o../arm-darwin/yuv444rgb8888_armv6.o -O2 -s -ffast-math -std=c99
+	$(iOS_GCC) -isysroot$(iOS_SYSROOT) -arch armv7 yuv444rgb8888.c -c -o../arm-darwin/yuv444rgb8888_armv7.o -O2 -s -ffast-math -std=c99
+	lipo -create ../arm-darwin/yuv444rgb8888_armv6.o ../arm-darwin/yuv444rgb8888_armv7.o -output ../arm-darwin/yuv444rgb8888.o
+	rm -f ../arm-darwin/yuv444rgb8888_armv6.o
+	rm -f ../arm-darwin/yuv444rgb8888_armv7.o
 	gcc yuv2bgr16tab.c -c -o../i386-iphonesim/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv2bgr16tab.c -c -o../i386-iphonesim/yuv2bgr16tab.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv420rgb8888.c -c -o../i386-iphonesim/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
 	gcc yuv420rgb8888.c -c -o../i386-iphonesim/yuv420rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
+	gcc yuv422rgb8888.c -c -o../i386-iphonesim/yuv422rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32
+	gcc yuv444rgb8888.c -c -o../i386-iphonesim/yuv444rgb8888.o -O2 -s -ffast-math -std=c99 -march=i686 -m32

+ 4 - 0
lib/theora/yuv2rgba/compile_bcc32.bat

@@ -0,0 +1,4 @@
+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv2bgr16tab.obj yuv2bgr16tab.c
+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv420rgb8888.obj yuv420rgb8888.c
+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv422rgb8888.obj yuv422rgb8888.c
+%BCC32% -6 -I.\ -O2 -a8 -c -u- -o..\delphi\yuv444rgb8888.obj yuv444rgb8888.c

+ 182 - 0
lib/theora/yuv2rgba/yuv422rgb8888.c

@@ -0,0 +1,182 @@
+/* YUV-> RGB conversion code.
+ *
+ * Copyright (C) 2011 Robin Watts ([email protected]) for Pinknoise
+ * Productions Ltd.
+ *
+ * Licensed under the BSD license. See 'COPYING' for details of
+ * (non-)warranty.
+ *
+ *
+ * The algorithm used here is based heavily on one created by Sophie Wilson
+ * of Acorn/e-14/Broadcomm. Many thanks.
+ *
+ * Additional tweaks (in the fast fixup code) are from Paul Gardiner.
+ *
+ * The old implementation of YUV -> RGB did:
+ *
+ * R = CLAMP((Y-16)*1.164 +           1.596*V)
+ * G = CLAMP((Y-16)*1.164 - 0.391*U - 0.813*V)
+ * B = CLAMP((Y-16)*1.164 + 2.018*U          )
+ *
+ * We're going to bend that here as follows:
+ *
+ * R = CLAMP(y +           1.596*V)
+ * G = CLAMP(y - 0.383*U - 0.813*V)
+ * B = CLAMP(y + 1.976*U          )
+ *
+ * where y = 0               for       Y <=  16,
+ *       y = (  Y-16)*1.164, for  16 < Y <= 239,
+ *       y = (239-16)*1.164, for 239 < Y
+ *
+ * i.e. We clamp Y to the 16 to 239 range (which it is supposed to be in
+ * anyway). We then pick the B_U factor so that B never exceeds 511. We then
+ * shrink the G_U factor in line with that to avoid a colour shift as much as
+ * possible.
+ *
+ * We're going to use tables to do it faster, but rather than doing it using
+ * 5 tables as as the above suggests, we're going to do it using just 3.
+ *
+ * We do this by working in parallel within a 32 bit word, and using one
+ * table each for Y U and V.
+ *
+ * Source Y values are    0 to 255, so    0.. 260 after scaling
+ * Source U values are -128 to 127, so  -49.. 49(G), -253..251(B) after
+ * Source V values are -128 to 127, so -204..203(R), -104..103(G) after
+ *
+ * So total summed values:
+ * -223 <= R <= 481, -173 <= G <= 431, -253 <= B < 511
+ *
+ * We need to pack R G and B into a 32 bit word, and because of Bs range we
+ * need 2 bits above the valid range of B to detect overflow, and another one
+ * to detect the sense of the overflow. We therefore adopt the following
+ * representation:
+ *
+ * osGGGGGgggggosBBBBBbbbosRRRRRrrr
+ *
+ * Each such word breaks down into 3 ranges.
+ *
+ * osGGGGGggggg   osBBBBBbbb   osRRRRRrrr
+ *
+ * Thus we have 8 bits for each B and R table entry, and 10 bits for G (good
+ * as G is the most noticable one). The s bit for each represents the sign,
+ * and o represents the overflow.
+ *
+ * For R and B we pack the table by taking the 11 bit representation of their
+ * values, and toggling bit 10 in the U and V tables.
+ *
+ * For the green case we calculate 4*G (thus effectively using 10 bits for the
+ * valid range) truncate to 12 bits. We toggle bit 11 in the Y table.
+ */
+
+#include "yuv2rgb.h"
+
+enum
+{
+    FLAGS         = 0x40080100
+};
+
+#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)])
+#define READY(Y)    tables[Y]
+#define FIXUP(Y)                 \
+do {                             \
+    int tmp = (Y) & FLAGS;       \
+    if (tmp != 0)                \
+    {                            \
+        tmp  -= tmp>>8;          \
+        (Y)  |= tmp;             \
+        tmp   = FLAGS & ~(Y>>1); \
+        (Y)  += tmp>>8;          \
+    }                            \
+} while (0 == 1)
+
+#define STORE(Y,DSTPTR)         \
+do {                            \
+    *(DSTPTR)++ = (Y);          \
+    *(DSTPTR)++ = (Y)>>22;      \
+    *(DSTPTR)++ = (Y)>>11;      \
+    *(DSTPTR)++ = 255;          \
+} while (0 == 1)
+
+void yuv422_2_rgb8888(uint8_t  *dst_ptr,
+                const uint8_t  *y_ptr,
+                const uint8_t  *u_ptr,
+                const uint8_t  *v_ptr,
+                      int32_t   width,
+                      int32_t   height,
+                      int32_t   y_span,
+                      int32_t   uv_span,
+                      int32_t   dst_span,
+                const uint32_t *tables,
+                      int32_t   dither)
+{
+    height -= 1;
+    while (height > 0)
+    {
+        height -= width<<16;
+        height += 1<<16;
+        while (height < 0)
+        {
+            /* Do top row pair */
+            uint32_t uv, y0, y1;
+
+            uv  = READUV(*u_ptr++,*v_ptr++);
+            y0  = uv + READY(*y_ptr++);
+            y1  = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            FIXUP(y1);
+            STORE(y0, dst_ptr);
+            STORE(y1, dst_ptr);
+            height += (2<<16);
+        }
+        if ((height>>16) == 0)
+        {
+            /* Trailing top row pix */
+            uint32_t uv, y0;
+
+            uv = READUV(*u_ptr,*v_ptr);
+            y0 = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            STORE(y0, dst_ptr);
+        }
+        dst_ptr += dst_span-width*4;
+        y_ptr   += y_span-width;
+        u_ptr   += uv_span-(width>>1);
+        v_ptr   += uv_span-(width>>1);
+        height = (height<<16)>>16;
+        height -= 1;
+        if (height == 0)
+            break;
+        height -= width<<16;
+        height += 1<<16;
+        while (height < 0)
+        {
+            /* Do second row pair */
+            uint32_t uv, y0, y1;
+
+            uv  = READUV(*u_ptr++,*v_ptr++);
+            y0  = uv + READY(*y_ptr++);
+            y1  = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            FIXUP(y1);
+            STORE(y0, dst_ptr);
+            STORE(y1, dst_ptr);
+            height += (2<<16);
+        }
+        if ((height>>16) == 0)
+        {
+            /* Trailing bottom row pix */
+            uint32_t uv, y0;
+
+            uv = READUV(*u_ptr,*v_ptr);
+            y0 = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            STORE(y0, dst_ptr);
+        }
+        dst_ptr += dst_span-width*4;
+        y_ptr   += y_span-width;
+        u_ptr   += uv_span-(width>>1);
+        v_ptr   += uv_span-(width>>1);
+        height = (height<<16)>>16;
+        height -= 1;
+    }
+}

+ 184 - 0
lib/theora/yuv2rgba/yuv444rgb8888.c

@@ -0,0 +1,184 @@
+/* YUV-> RGB conversion code.
+ *
+ * Copyright (C) 2011 Robin Watts ([email protected]) for Pinknoise
+ * Productions Ltd.
+ *
+ * Licensed under the BSD license. See 'COPYING' for details of
+ * (non-)warranty.
+ *
+ *
+ * The algorithm used here is based heavily on one created by Sophie Wilson
+ * of Acorn/e-14/Broadcomm. Many thanks.
+ *
+ * Additional tweaks (in the fast fixup code) are from Paul Gardiner.
+ *
+ * The old implementation of YUV -> RGB did:
+ *
+ * R = CLAMP((Y-16)*1.164 +           1.596*V)
+ * G = CLAMP((Y-16)*1.164 - 0.391*U - 0.813*V)
+ * B = CLAMP((Y-16)*1.164 + 2.018*U          )
+ *
+ * We're going to bend that here as follows:
+ *
+ * R = CLAMP(y +           1.596*V)
+ * G = CLAMP(y - 0.383*U - 0.813*V)
+ * B = CLAMP(y + 1.976*U          )
+ *
+ * where y = 0               for       Y <=  16,
+ *       y = (  Y-16)*1.164, for  16 < Y <= 239,
+ *       y = (239-16)*1.164, for 239 < Y
+ *
+ * i.e. We clamp Y to the 16 to 239 range (which it is supposed to be in
+ * anyway). We then pick the B_U factor so that B never exceeds 511. We then
+ * shrink the G_U factor in line with that to avoid a colour shift as much as
+ * possible.
+ *
+ * We're going to use tables to do it faster, but rather than doing it using
+ * 5 tables as as the above suggests, we're going to do it using just 3.
+ *
+ * We do this by working in parallel within a 32 bit word, and using one
+ * table each for Y U and V.
+ *
+ * Source Y values are    0 to 255, so    0.. 260 after scaling
+ * Source U values are -128 to 127, so  -49.. 49(G), -253..251(B) after
+ * Source V values are -128 to 127, so -204..203(R), -104..103(G) after
+ *
+ * So total summed values:
+ * -223 <= R <= 481, -173 <= G <= 431, -253 <= B < 511
+ *
+ * We need to pack R G and B into a 32 bit word, and because of Bs range we
+ * need 2 bits above the valid range of B to detect overflow, and another one
+ * to detect the sense of the overflow. We therefore adopt the following
+ * representation:
+ *
+ * osGGGGGgggggosBBBBBbbbosRRRRRrrr
+ *
+ * Each such word breaks down into 3 ranges.
+ *
+ * osGGGGGggggg   osBBBBBbbb   osRRRRRrrr
+ *
+ * Thus we have 8 bits for each B and R table entry, and 10 bits for G (good
+ * as G is the most noticable one). The s bit for each represents the sign,
+ * and o represents the overflow.
+ *
+ * For R and B we pack the table by taking the 11 bit representation of their
+ * values, and toggling bit 10 in the U and V tables.
+ *
+ * For the green case we calculate 4*G (thus effectively using 10 bits for the
+ * valid range) truncate to 12 bits. We toggle bit 11 in the Y table.
+ */
+
+#include "yuv2rgb.h"
+
+enum
+{
+    FLAGS         = 0x40080100
+};
+
+#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)])
+#define READY(Y)    tables[Y]
+#define FIXUP(Y)                 \
+do {                             \
+    int tmp = (Y) & FLAGS;       \
+    if (tmp != 0)                \
+    {                            \
+        tmp  -= tmp>>8;          \
+        (Y)  |= tmp;             \
+        tmp   = FLAGS & ~(Y>>1); \
+        (Y)  += tmp>>8;          \
+    }                            \
+} while (0 == 1)
+
+#define STORE(Y,DSTPTR)         \
+do {                            \
+    *(DSTPTR)++ = (Y);          \
+    *(DSTPTR)++ = (Y)>>22;      \
+    *(DSTPTR)++ = (Y)>>11;      \
+    *(DSTPTR)++ = 255;          \
+} while (0 == 1)
+
+void yuv444_2_rgb8888(uint8_t  *dst_ptr,
+                const uint8_t  *y_ptr,
+                const uint8_t  *u_ptr,
+                const uint8_t  *v_ptr,
+                      int32_t   width,
+                      int32_t   height,
+                      int32_t   y_span,
+                      int32_t   uv_span,
+                      int32_t   dst_span,
+                const uint32_t *tables,
+                      int32_t   dither)
+{
+    height -= 1;
+    while (height > 0)
+    {
+        height -= width<<16;
+        height += 1<<16;
+        while (height < 0)
+        {
+            /* Do top row pair */
+            uint32_t uv, y0, y1;
+
+            uv  = READUV(*u_ptr++,*v_ptr++);
+            y0  = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            STORE(y0, dst_ptr);
+            uv  = READUV(*u_ptr++,*v_ptr++);
+            y1  = uv + READY(*y_ptr++);
+            FIXUP(y1);
+            STORE(y1, dst_ptr);
+            height += (2<<16);
+        }
+        if ((height>>16) == 0)
+        {
+            /* Trailing top row pix */
+            uint32_t uv, y0;
+
+            uv = READUV(*u_ptr++,*v_ptr++);
+            y0 = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            STORE(y0, dst_ptr);
+        }
+        dst_ptr += dst_span-width*4;
+        y_ptr   += y_span-width;
+        u_ptr   += uv_span-width;
+        v_ptr   += uv_span-width;
+        height = (height<<16)>>16;
+        height -= 1;
+        if (height == 0)
+            break;
+        height -= width<<16;
+        height += 1<<16;
+        while (height < 0)
+        {
+            /* Do second row pair */
+            uint32_t uv, y0, y1;
+
+            uv  = READUV(*u_ptr++,*v_ptr++);
+            y0  = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            STORE(y0, dst_ptr);
+            uv  = READUV(*u_ptr++,*v_ptr++);
+            y1  = uv + READY(*y_ptr++);
+            FIXUP(y1);
+            STORE(y1, dst_ptr);
+            height += (2<<16);
+        }
+        if ((height>>16) == 0)
+        {
+            /* Trailing bottom row pix */
+            uint32_t uv, y0;
+
+            uv = READUV(*u_ptr++,*v_ptr++);
+            y0 = uv + READY(*y_ptr++);
+            FIXUP(y0);
+            STORE(y0, dst_ptr);
+        }
+        dst_ptr += dst_span-width*4;
+        y_ptr   += y_span-width;
+        u_ptr   += uv_span-width;
+        v_ptr   += uv_span-width;
+        height = (height<<16)>>16;
+        height -= 1;
+    }
+}

+ 2 - 2
src/zgl_main.pas

@@ -48,8 +48,8 @@ uses
   zgl_types;
   zgl_types;
 
 
 const
 const
-  cs_ZenGL    = 'ZenGL 0.3.6';
-  cs_Date     = '2012.12.16';
+  cs_ZenGL    = 'ZenGL 0.3.6 [ End of the World ]';
+  cs_Date     = '2012.12.21';
   cv_major    = 0;
   cv_major    = 0;
   cv_minor    = 3;
   cv_minor    = 3;
   cv_revision = 6;
   cv_revision = 6;

+ 26 - 26
src/zgl_video_theora.pas

@@ -27,6 +27,8 @@ unit zgl_video_theora;
 
 
 {$L yuv2bgr16tab}
 {$L yuv2bgr16tab}
 {$L yuv420rgb8888}
 {$L yuv420rgb8888}
+{$L yuv422rgb8888}
+{$L yuv444rgb8888}
 
 
 interface
 interface
 
 
@@ -56,8 +58,10 @@ type
     Time        : Double;
     Time        : Double;
   end;
   end;
 
 
+function  get_yuv2bgr565_table : pcuint32; cdecl; external;
 procedure yuv420_2_rgb8888( dst_ptr, y_ptr, u_ptr, v_ptr : pcuchar; width, height, y_span, uv_span, dst_span : cint32; tables : pcuint32; dither : cint32 ); cdecl; external;
 procedure yuv420_2_rgb8888( dst_ptr, y_ptr, u_ptr, v_ptr : pcuchar; width, height, y_span, uv_span, dst_span : cint32; tables : pcuint32; dither : cint32 ); cdecl; external;
-function get_yuv2bgr565_table : pcuint32; cdecl; external;
+procedure yuv422_2_rgb8888( dst_ptr, y_ptr, u_ptr, v_ptr : pcuchar; width, height, y_span, uv_span, dst_span : cint32; tables : pcuint32; dither : cint32 ); cdecl; external;
+procedure yuv444_2_rgb8888( dst_ptr, y_ptr, u_ptr, v_ptr : pcuchar; width, height, y_span, uv_span, dst_span : cint32; tables : pcuint32; dither : cint32 ); cdecl; external;
 
 
 var
 var
   theoraDecoderOGV : zglTVideoDecoder;
   theoraDecoderOGV : zglTVideoDecoder;
@@ -176,8 +180,8 @@ begin
     begin
     begin
       TheoraData.DecoderCtx := th_decode_alloc( @TheoraData.TheoraInfo, setupInfo );
       TheoraData.DecoderCtx := th_decode_alloc( @TheoraData.TheoraInfo, setupInfo );
 
 
-      Width     := TheoraData.TheoraInfo.frame_width;
-      Height    := TheoraData.TheoraInfo.frame_height;
+      Width     := TheoraData.TheoraInfo.pic_width;
+      Height    := TheoraData.TheoraInfo.pic_height;
       FrameRate := TheoraData.TheoraInfo.fps_numerator / TheoraData.TheoraInfo.fps_denominator;
       FrameRate := TheoraData.TheoraInfo.fps_numerator / TheoraData.TheoraInfo.fps_denominator;
       Duration  := 0;
       Duration  := 0;
       Frames    := 0;
       Frames    := 0;
@@ -219,26 +223,9 @@ begin
     end else
     end else
       Result := FALSE;
       Result := FALSE;
 
 
-  if TheoraData.TheoraInfo.pixel_fmt <> TH_PF_420 Then
-    begin
-      log_Add( 'Theora: Pixel format is not supported(YUV 4:2:0 is needed)' );
-      Result := FALSE;
-    end;
-
   th_comment_clear( @comment );
   th_comment_clear( @comment );
 end;
 end;
 
 
-function clamp( value : Integer ) : Integer; {$IFDEF USE_INLINE} inline; {$ENDIF}
-begin
-  if value > 2088960 Then
-    Result := 2088960
-  else
-    if value < 0 Then
-      Result := 0
-    else
-      Result := value;
-end;
-
 function theora_Update( var TheoraData : zglTTheoraData; Time : Double; Data : PByteArray ) : Integer;
 function theora_Update( var TheoraData : zglTTheoraData; Time : Double; Data : PByteArray ) : Integer;
   var
   var
     ycbcr      : th_ycbcr_buffer;
     ycbcr      : th_ycbcr_buffer;
@@ -280,12 +267,25 @@ begin
         th_decode_ycbcr_out( TheoraData.DecoderCtx, @ycbcr );
         th_decode_ycbcr_out( TheoraData.DecoderCtx, @ycbcr );
 
 
         dataOrig := Data;
         dataOrig := Data;
-        INC( PByte( Data ), ( ycbcr[ 0 ].height - 1 ) * ycbcr[ 0 ].width * 4 );
-
-        yuv420_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
-                          ycbcr[ 0 ].width, ycbcr[ 0 ].height,
-                          ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -ycbcr[ 0 ].width * 4,
-                          get_yuv2bgr565_table(), 0 );
+        INC( PByte( Data ), ( TheoraData.TheoraInfo.pic_height - 1 ) * TheoraData.TheoraInfo.pic_width * 4 );
+
+        case TheoraData.TheoraInfo.pixel_fmt of
+          TH_PF_420:
+            yuv420_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
+                              TheoraData.TheoraInfo.pic_width, TheoraData.TheoraInfo.pic_height,
+                              ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -TheoraData.TheoraInfo.pic_width * 4,
+                              get_yuv2bgr565_table(), 0 );
+          TH_PF_422:
+            yuv422_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
+                              TheoraData.TheoraInfo.pic_width, TheoraData.TheoraInfo.pic_height,
+                              ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -TheoraData.TheoraInfo.pic_width * 4,
+                              get_yuv2bgr565_table(), 0 );
+          TH_PF_444:
+            yuv444_2_rgb8888( pcuchar( Data ), ycbcr[ 0 ].data, ycbcr[ 1 ].data, ycbcr[ 2 ].data,
+                              TheoraData.TheoraInfo.pic_width, TheoraData.TheoraInfo.pic_height,
+                              ycbcr[ 0 ].stride, ycbcr[ 1 ].stride, -TheoraData.TheoraInfo.pic_width * 4,
+                              get_yuv2bgr565_table(), 0 );
+        end;
 
 
         {$IFDEF ANDROID}
         {$IFDEF ANDROID}
         INC( PByte( dataOrig ), 3 );
         INC( PByte( dataOrig ), 3 );