Browse Source

* GAS no longer required to build hermes on i386

git-svn-id: trunk@16827 -
nickysn 14 years ago
parent
commit
c96b89adce

+ 15 - 15
.gitattributes

@@ -3405,21 +3405,21 @@ packages/hermes/src/hermes_format.inc svneol=native#text/plain
 packages/hermes/src/hermes_list.inc svneol=native#text/plain
 packages/hermes/src/hermes_list.inc svneol=native#text/plain
 packages/hermes/src/hermes_palette.inc svneol=native#text/plain
 packages/hermes/src/hermes_palette.inc svneol=native#text/plain
 packages/hermes/src/hermes_utility.inc svneol=native#text/plain
 packages/hermes/src/hermes_utility.inc svneol=native#text/plain
-packages/hermes/src/i386/headi386.inc -text
-packages/hermes/src/i386/headmmx.inc -text
-packages/hermes/src/i386/mmx_clr.as -text
-packages/hermes/src/i386/mmx_main.as -text
-packages/hermes/src/i386/mmxp2_32.as -text
-packages/hermes/src/i386/mmxp_32.as -text
-packages/hermes/src/i386/x8616lut.as -text
-packages/hermes/src/i386/x86_clr.as -text
-packages/hermes/src/i386/x86_main.as -text
-packages/hermes/src/i386/x86p_16.as -text
-packages/hermes/src/i386/x86p_32.as -text
-packages/hermes/src/i386/x86p_cpy.as -text
-packages/hermes/src/i386/x86p_i8.as -text
-packages/hermes/src/i386/x86p_s32.as -text
-packages/hermes/src/i386/x86pscpy.as -text
+packages/hermes/src/i386/headi386.inc svneol=native#text/plain
+packages/hermes/src/i386/headmmx.inc svneol=native#text/plain
+packages/hermes/src/i386/mmx_clr.inc svneol=native#text/plain
+packages/hermes/src/i386/mmx_main.inc svneol=native#text/plain
+packages/hermes/src/i386/mmxp2_32.inc svneol=native#text/plain
+packages/hermes/src/i386/mmxp_32.inc svneol=native#text/plain
+packages/hermes/src/i386/x8616lut.inc svneol=native#text/plain
+packages/hermes/src/i386/x86_clr.inc svneol=native#text/plain
+packages/hermes/src/i386/x86_main.inc svneol=native#text/plain
+packages/hermes/src/i386/x86p_16.inc svneol=native#text/plain
+packages/hermes/src/i386/x86p_32.inc svneol=native#text/plain
+packages/hermes/src/i386/x86p_cpy.inc svneol=native#text/plain
+packages/hermes/src/i386/x86p_i8.inc svneol=native#text/plain
+packages/hermes/src/i386/x86p_s32.inc svneol=native#text/plain
+packages/hermes/src/i386/x86pscpy.inc svneol=native#text/plain
 packages/hermes/src/p_16.inc svneol=native#text/plain
 packages/hermes/src/p_16.inc svneol=native#text/plain
 packages/hermes/src/p_24.inc svneol=native#text/plain
 packages/hermes/src/p_24.inc svneol=native#text/plain
 packages/hermes/src/p_32.inc svneol=native#text/plain
 packages/hermes/src/p_32.inc svneol=native#text/plain

+ 1 - 72
packages/hermes/Makefile

@@ -1,5 +1,5 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2011/01/03]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2011/01/28]
 #
 #
 default: all
 default: all
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-solaris x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded mipsel-linux
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-solaris x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded mipsel-linux
@@ -452,24 +452,6 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override TARGET_UNITS+=hermes
 override TARGET_UNITS+=hermes
 endif
 endif
-ifeq ($(FULL_TARGET),i386-linux)
-override TARGET_LOADERS+=mmx_clr mmx_main mmxp2_32 mmxp_32 x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy x86p_i8 x86p_s32 x86pscpy
-endif
-ifeq ($(FULL_TARGET),i386-go32v2)
-override TARGET_LOADERS+=mmx_clr mmx_main mmxp2_32 mmxp_32 x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy x86p_i8 x86p_s32 x86pscpy
-endif
-ifeq ($(FULL_TARGET),i386-win32)
-override TARGET_LOADERS+=mmx_clr mmx_main mmxp2_32 mmxp_32 x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy x86p_i8 x86p_s32 x86pscpy
-endif
-ifeq ($(FULL_TARGET),i386-freebsd)
-override TARGET_LOADERS+=mmx_clr mmx_main mmxp2_32 mmxp_32 x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy x86p_i8 x86p_s32 x86pscpy
-endif
-ifeq ($(FULL_TARGET),i386-beos)
-override TARGET_LOADERS+=mmx_clr mmx_main mmxp2_32 mmxp_32 x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy x86p_i8 x86p_s32 x86pscpy
-endif
-ifeq ($(FULL_TARGET),i386-haiku)
-override TARGET_LOADERS+=mmx_clr mmx_main mmxp2_32 mmxp_32 x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy x86p_i8 x86p_s32 x86pscpy
-endif
 override INSTALL_FPCPACKAGE=y
 override INSTALL_FPCPACKAGE=y
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_OPTIONS+=-dI386_ASSEMBLER
 override COMPILER_OPTIONS+=-dI386_ASSEMBLER
@@ -2183,33 +2165,6 @@ EXECPPAS:=@$(PPAS)
 endif
 endif
 endif
 endif
 endif
 endif
-.PHONY: fpc_loaders
-ifneq ($(TARGET_LOADERS),)
-override ALLTARGET+=fpc_loaders
-override CLEANTARGET+=fpc_loaders_clean
-override INSTALLTARGET+=fpc_loaders_install
-override LOADEROFILES:=$(addsuffix $(OEXT),$(TARGET_LOADERS))
-endif
-%$(OEXT): %$(LOADEREXT)
-ifdef COMPILER_UNITTARGETDIR
-	$(AS) -o $(COMPILER_UNITTARGETDIR)/$*$(OEXT) $<
-else
-	$(AS) -o $*$(OEXT) $<
-endif
-fpc_loaders: $(COMPILER_UNITTARGETDIR) $(LOADEROFILES)
-fpc_loaders_clean:
-ifdef COMPILER_UNITTARGETDIR
-	-$(DEL) $(addprefix $(COMPILER_UNITTARGETDIR)/,$(LOADEROFILES))
-else
-	-$(DEL) $(LOADEROFILES)
-endif
-fpc_loaders_install:
-	$(MKDIR) $(INSTALL_UNITDIR)
-ifdef COMPILER_UNITTARGETDIR
-	$(INSTALL) $(addprefix $(COMPILER_UNITTARGETDIR)/,$(LOADEROFILES)) $(INSTALL_UNITDIR)
-else
-	$(INSTALL) $(LOADEROFILES) $(INSTALL_UNITDIR)
-endif
 .PHONY: fpc_units
 .PHONY: fpc_units
 ifneq ($(TARGET_UNITS)$(TARGET_IMPLICITUNITS),)
 ifneq ($(TARGET_UNITS)$(TARGET_IMPLICITUNITS),)
 override ALLTARGET+=fpc_units
 override ALLTARGET+=fpc_units
@@ -2657,29 +2612,3 @@ ifneq ($(wildcard fpcmake.loc),)
 include fpcmake.loc
 include fpcmake.loc
 endif
 endif
 .NOTPARALLEL:
 .NOTPARALLEL:
-mmx_clr$(OEXT): src/$(CPU_TARGET)/mmx_clr.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)mmx_clr$(OEXT) src/$(CPU_TARGET)/mmx_clr.as
-mmx_main$(OEXT): src/$(CPU_TARGET)/mmx_main.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)mmx_main$(OEXT) src/$(CPU_TARGET)/mmx_main.as
-mmxp2_32$(OEXT): src/$(CPU_TARGET)/mmxp2_32.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)mmxp2_32$(OEXT) src/$(CPU_TARGET)/mmxp2_32.as
-mmxp_32$(OEXT): src/$(CPU_TARGET)/mmxp_32.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)mmxp_32$(OEXT) src/$(CPU_TARGET)/mmxp_32.as
-x8616lut$(OEXT): src/$(CPU_TARGET)/x8616lut.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x8616lut$(OEXT) src/$(CPU_TARGET)/x8616lut.as
-x86_clr$(OEXT): src/$(CPU_TARGET)/x86_clr.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86_clr$(OEXT) src/$(CPU_TARGET)/x86_clr.as
-x86_main$(OEXT): src/$(CPU_TARGET)/x86_main.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86_main$(OEXT) src/$(CPU_TARGET)/x86_main.as
-x86p_16$(OEXT): src/$(CPU_TARGET)/x86p_16.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_16$(OEXT) src/$(CPU_TARGET)/x86p_16.as
-x86p_32$(OEXT): src/$(CPU_TARGET)/x86p_32.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_32$(OEXT) src/$(CPU_TARGET)/x86p_32.as
-x86p_cpy$(OEXT): src/$(CPU_TARGET)/x86p_cpy.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_cpy$(OEXT) src/$(CPU_TARGET)/x86p_cpy.as
-x86p_i8$(OEXT): src/$(CPU_TARGET)/x86p_i8.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_i8$(OEXT) src/$(CPU_TARGET)/x86p_i8.as
-x86p_s32$(OEXT): src/$(CPU_TARGET)/x86p_s32.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_s32$(OEXT) src/$(CPU_TARGET)/x86p_s32.as
-x86pscpy$(OEXT): src/$(CPU_TARGET)/x86pscpy.as
-	$(AS) --32 -o $(UNITTARGETDIRPREFIX)x86pscpy$(OEXT) src/$(CPU_TARGET)/x86pscpy.as

+ 0 - 56
packages/hermes/Makefile.fpc

@@ -8,24 +8,6 @@ version=2.5.1
 
 
 [target]
 [target]
 units=hermes
 units=hermes
-loaders_i386_linux=mmx_clr mmx_main mmxp2_32 mmxp_32 \
-             x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy \
-             x86p_i8 x86p_s32 x86pscpy
-loaders_i386_win32=mmx_clr mmx_main mmxp2_32 mmxp_32 \
-             x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy \
-             x86p_i8 x86p_s32 x86pscpy
-loaders_i386_go32v2=mmx_clr mmx_main mmxp2_32 mmxp_32 \
-             x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy \
-             x86p_i8 x86p_s32 x86pscpy
-loaders_i386_freebsd=mmx_clr mmx_main mmxp2_32 mmxp_32 \
-             x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy \
-             x86p_i8 x86p_s32 x86pscpy
-loaders_i386_haiku=mmx_clr mmx_main mmxp2_32 mmxp_32 \
-             x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy \
-             x86p_i8 x86p_s32 x86pscpy
-loaders_i386_beos=mmx_clr mmx_main mmxp2_32 mmxp_32 \
-             x8616lut x86_clr x86_main x86p_16 x86p_32 x86p_cpy \
-             x86p_i8 x86p_s32 x86pscpy
 
 
 [compiler]
 [compiler]
 options_i386_linux=-dI386_ASSEMBLER
 options_i386_linux=-dI386_ASSEMBLER
@@ -48,41 +30,3 @@ fpcdir=../..
 
 
 [rules]
 [rules]
 .NOTPARALLEL:
 .NOTPARALLEL:
-mmx_clr$(OEXT): src/$(CPU_TARGET)/mmx_clr.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)mmx_clr$(OEXT) src/$(CPU_TARGET)/mmx_clr.as
-
-mmx_main$(OEXT): src/$(CPU_TARGET)/mmx_main.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)mmx_main$(OEXT) src/$(CPU_TARGET)/mmx_main.as
-
-mmxp2_32$(OEXT): src/$(CPU_TARGET)/mmxp2_32.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)mmxp2_32$(OEXT) src/$(CPU_TARGET)/mmxp2_32.as
-
-mmxp_32$(OEXT): src/$(CPU_TARGET)/mmxp_32.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)mmxp_32$(OEXT) src/$(CPU_TARGET)/mmxp_32.as
-
-x8616lut$(OEXT): src/$(CPU_TARGET)/x8616lut.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x8616lut$(OEXT) src/$(CPU_TARGET)/x8616lut.as
-
-x86_clr$(OEXT): src/$(CPU_TARGET)/x86_clr.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86_clr$(OEXT) src/$(CPU_TARGET)/x86_clr.as
-
-x86_main$(OEXT): src/$(CPU_TARGET)/x86_main.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86_main$(OEXT) src/$(CPU_TARGET)/x86_main.as
-
-x86p_16$(OEXT): src/$(CPU_TARGET)/x86p_16.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_16$(OEXT) src/$(CPU_TARGET)/x86p_16.as
-
-x86p_32$(OEXT): src/$(CPU_TARGET)/x86p_32.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_32$(OEXT) src/$(CPU_TARGET)/x86p_32.as
-
-x86p_cpy$(OEXT): src/$(CPU_TARGET)/x86p_cpy.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_cpy$(OEXT) src/$(CPU_TARGET)/x86p_cpy.as
-
-x86p_i8$(OEXT): src/$(CPU_TARGET)/x86p_i8.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_i8$(OEXT) src/$(CPU_TARGET)/x86p_i8.as
-
-x86p_s32$(OEXT): src/$(CPU_TARGET)/x86p_s32.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86p_s32$(OEXT) src/$(CPU_TARGET)/x86p_s32.as
-
-x86pscpy$(OEXT): src/$(CPU_TARGET)/x86pscpy.as
-        $(AS) --32 -o $(UNITTARGETDIRPREFIX)x86pscpy$(OEXT) src/$(CPU_TARGET)/x86pscpy.as

+ 9 - 51
packages/hermes/src/i386/headi386.inc

@@ -29,54 +29,12 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 }
 }
 
 
-{$L x8616lut}
-{$L x86_clr}
-{$L x86_main}
-{$L x86p_16}
-{$L x86p_32}
-{$L x86p_cpy}
-{$L x86p_i8}
-{$L x86p_s32}
-{$L x86pscpy}
-
-procedure ConvertX86(hci: PHermesConverterInterface); cdecl; external name ExternalAsmPrefix+'ConvertX86';
-procedure ConvertX86Stretch(hci: PHermesConverterInterface); cdecl; external name ExternalAsmPrefix+'ConvertX86Stretch';
-procedure ClearX86_32(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearX86_32';
-procedure ClearX86_24(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearX86_24';
-procedure ClearX86_16(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearX86_16';
-procedure ClearX86_8(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearX86_8';
-
-function Hermes_X86_CPU: Integer; cdecl; external name ExternalAsmPrefix+'Hermes_X86_CPU';
-
-procedure ConvertX86p32_32BGR888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_32BGR888';
-procedure ConvertX86p32_32RGBA888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_32RGBA888';
-procedure ConvertX86p32_32BGRA888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_32BGRA888';
-procedure ConvertX86p32_24RGB888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_24RGB888';
-procedure ConvertX86p32_24BGR888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_24BGR888';
-procedure ConvertX86p32_16RGB565(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_16RGB565';
-procedure ConvertX86p32_16BGR565(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_16BGR565';
-procedure ConvertX86p32_16RGB555(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_16RGB555';
-procedure ConvertX86p32_16BGR555(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_16BGR555';
-procedure ConvertX86p32_8RGB332(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_8RGB332';
-
-procedure ConvertX86p32_16RGB565_S(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p32_16RGB565_S';
-
-procedure ConvertX86p16_32RGB888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_32RGB888';
-procedure ConvertX86p16_32BGR888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_32BGR888';
-procedure ConvertX86p16_32RGBA888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_32RGBA888';
-procedure ConvertX86p16_32BGRA888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_32BGRA888';
-procedure ConvertX86p16_24RGB888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_24RGB888';
-procedure ConvertX86p16_24BGR888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_24BGR888';
-procedure ConvertX86p16_16BGR565(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_16BGR565';
-procedure ConvertX86p16_16RGB555(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_16RGB555';
-procedure ConvertX86p16_16BGR555(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_16BGR555';
-procedure ConvertX86p16_8RGB332(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86p16_8RGB332';
-
-procedure CopyX86p_4byte(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'CopyX86p_4byte';
-procedure CopyX86p_3byte(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'CopyX86p_3byte';
-procedure CopyX86p_2byte(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'CopyX86p_2byte';
-procedure CopyX86p_1byte(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'CopyX86p_1byte';
-
-procedure ConvertX86pI8_32(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86pI8_32';
-procedure ConvertX86pI8_24(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86pI8_24';
-procedure ConvertX86pI8_16(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertX86pI8_16';
+{$I x8616lut.inc}
+{$I x86_clr.inc}
+{$I x86_main.inc}
+{$I x86p_16.inc}
+{$I x86p_32.inc}
+{$I x86p_cpy.inc}
+{$I x86p_i8.inc}
+{$I x86p_s32.inc}
+{$I x86pscpy.inc}

+ 4 - 19
packages/hermes/src/i386/headmmx.inc

@@ -29,22 +29,7 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 }
 }
 
 
-{$L mmx_clr}
-{$L mmx_main}
-{$L mmxp2_32}
-{$L mmxp_32}
-
-procedure ConvertMMX(hci: PHermesConverterInterface); cdecl; external name ExternalAsmPrefix+'ConvertMMX';
-
-procedure ClearMMX_32(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearMMX_32';
-procedure ClearMMX_24(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearMMX_24';
-procedure ClearMMX_16(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearMMX_16';
-procedure ClearMMX_8(hci: PHermesClearInterface); cdecl; external name ExternalAsmPrefix+'ClearMMX_8';
-
-procedure ConvertMMXpII32_24RGB888(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertMMXpII32_24RGB888';
-procedure ConvertMMXpII32_16RGB565(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertMMXpII32_16RGB565';
-procedure ConvertMMXpII32_16BGR565(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertMMXpII32_16BGR565';
-procedure ConvertMMXpII32_16RGB555(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertMMXpII32_16RGB555';
-procedure ConvertMMXpII32_16BGR555(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertMMXpII32_16BGR555';
-
-procedure ConvertMMXp32_16RGB555(CONVERT_PARAMETERS); cdecl; external name ExternalAsmPrefix+'ConvertMMXp32_16RGB555';
+{$I mmx_main.inc}
+{$I mmx_clr.inc}
+{$I mmxp2_32.inc}
+{$I mmxp_32.inc}

+ 0 - 269
packages/hermes/src/i386/mmx_clr.as

@@ -1,269 +0,0 @@
-#
-# MMX surface clear routines for HERMES
-# Copyright (c) 1998 Christian Nentwich ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-
-
-.globl _ClearMMX_32
-.globl _ClearMMX_24
-.globl _ClearMMX_16
-.globl _ClearMMX_8
-
-.text
-
-##
-## --------------------------------------------------------------------------
-## HermesClearInterface (ebp+..)
-##   0: char8 *dest
-##   4: int32 value
-##   8: unsigned int width (already checked to be >0!)
-##  12: unsigned int height (already checked to be >0!)
-##  16: int add
-
-
-_ClearMMX_32:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl 4(%ebp),%eax       # pixel value
-        movd 4(%ebp),%mm0
-
-        movl 12(%ebp),%edx      # height
-        movq %mm0,%mm1
-
-        psllq $32,%mm0
-        movl (%ebp),%edi        # destination
-
-        por %mm1,%mm0
-_ClearMMX_32.L_y:
-        movl 8(%ebp),%ecx
-
-        movl %ecx,%ebx
-
-        shrl %ecx
-        jz _ClearMMX_32.L_last
-
-_ClearMMX_32.L_x:
-        movq %mm0,(%edi)
-        addl $8,%edi
-
-        decl %ecx
-        jnz _ClearMMX_32.L_x
-
-
-_ClearMMX_32.L_last:
-        testl $1,%ebx
-        jz _ClearMMX_32.L_endline
-
-        movl %eax,(%edi)
-        addl $4,%edi
-
-_ClearMMX_32.L_endline:
-
-        addl 16(%ebp),%edi
-
-        decl %edx
-        jnz _ClearMMX_32.L_y
-
-        emms
-
-        popl %ebp
-        ret
-
-
-
-_ClearMMX_24:
-        ret
-
-
-
-_ClearMMX_16:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl 4(%ebp),%eax       # pixel value
-        movl 4(%ebp),%ebx
-
-        movl 12(%ebp),%edx      # height
-        movl (%ebp),%edi        # destination
-
-        shll $16,%eax           # Duplicate pixel value
-        andl $0x0ffff,%ebx
-
-        orl %ebx,%eax
-
-        movd %eax,%mm0
-        movd %eax,%mm1
-
-        psllq $32,%mm0
-
-        por %mm1,%mm0
-_ClearMMX_16.L_y:
-        movl 8(%ebp),%ecx
-
-        testl $3,%edi           # Check if destination is aligned mod 4
-        jz _ClearMMX_16.L_aligned
-
-        movw %ax,(%edi)         # otherwise write one pixel
-        addl $2,%edi
-
-        decl %ecx
-        jz _ClearMMX_16.L_endline
-
-_ClearMMX_16.L_aligned:
-        movl %ecx,%ebx
-        shrl $2,%ecx
-
-        jz _ClearMMX_16.L_last
-
-_ClearMMX_16.L_x:
-        movq %mm0,(%edi)
-        addl $8,%edi
-
-        decl %ecx
-        jnz _ClearMMX_16.L_x
-
-_ClearMMX_16.L_last:
-        andl $3,%ebx
-        jz _ClearMMX_16.L_endline
-
-        movw %ax,(%edi)         # Write trailing pixels
-        addl $2,%edi
-        decl %ebx
-        jz _ClearMMX_16.L_endline
-
-        movw %ax,(%edi)
-        addl $2,%edi
-        decl %ebx
-        jz _ClearMMX_16.L_endline
-
-        movw %ax,(%edi)
-        addl $2,%edi
-        decl %ebx
-        jnz _ClearMMX_16.L_endline
-
-_ClearMMX_16.L_endline:
-        addl 16(%ebp),%edi
-
-        decl %edx
-        jnz _ClearMMX_16.L_y
-
-        emms
-
-        popl %ebp
-        ret
-
-
-
-## Clear8_x86 isnt optimised fully yet as it seems to be a tiny bit slower
-## than the C routine
-_ClearMMX_8:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl 4(%ebp),%eax       # pixel value
-        movl 4(%ebp),%ebx
-
-        movl 12(%ebp),%edx      # height
-        andl $0x0ff,%ebx
-
-        shll $8,%eax            # Put the byte pixel value in all four bytes
-        movl (%ebp),%edi        # destination
-
-        movb %bl,%al
-        movb %bl,%bh
-
-        shll $16,%eax
-
-        movb %bh,%ah
-        movb %bl,%al
-
-        movd %eax,%mm0
-        movd %eax,%mm1
-
-        psllq $32,%mm0
-
-        por %mm1,%mm0
-
-_ClearMMX_8.L_y:
-        movl 8(%ebp),%ecx
-
-        testl $3,%edi           # Align mod 4
-        jz _ClearMMX_8.L_aligned
-
-        movl %edi,%ebx
-
-        andl $3,%ebx
-
-        movb %al,(%edi)         # Unrolled (copy & paste), align and jump
-        incl %edi               # if finished, faster than a loop...
-        decl %ecx
-        jz _ClearMMX_8.L_endline
-        decl %ebx
-        jz _ClearMMX_8.L_aligned
-
-        movb %al,(%edi)         # Second pixel
-        incl %edi
-        decl %ecx
-        jz _ClearMMX_8.L_endline
-        decl %ebx
-        jz _ClearMMX_8.L_aligned
-
-        movb %al,(%edi)         # Third pixel
-        incl %edi
-        decl %ecx
-        jz _ClearMMX_8.L_endline
-        decl %ebx
-        jz _ClearMMX_8.L_aligned
-
-_ClearMMX_8.L_aligned:
-        movl %ecx,%ebx          # Store ecx for later
-
-        shrl $3,%ecx            # We write 8 pixels at once
-        jz _ClearMMX_8.L_last
-
-_ClearMMX_8.L_x:
-        movq %mm0,(%edi)
-        addl $8,%edi
-
-        decl %ecx
-        jnz _ClearMMX_8.L_x
-
-_ClearMMX_8.L_last:
-        movl %ebx,%ecx          # Clean up trailing pixels
-
-        andl $7,%ecx            # Could be up to 7 left
-        jz _ClearMMX_8.L_endline
-
-        testb $0b100,%cl        # If theres less than four jump
-        jz _ClearMMX_8.L_lessthanfour
-
-        movl %eax,(%edi)        # Otherwise write a dword
-        addl $4,%edi
-
-        subl $4,%ecx
-
-_ClearMMX_8.L_lessthanfour:
-        rep
- stosb              # Clean up the very rest
-
-_ClearMMX_8.L_endline:
-        addl 16(%ebp),%edi
-
-        decl %edx
-        jnz _ClearMMX_8.L_y
-
-        emms
-
-        popl %ebp
-        ret

+ 276 - 0
packages/hermes/src/i386/mmx_clr.inc

@@ -0,0 +1,276 @@
+{
+    MMX surface clear routines for HERMES
+    Copyright (c) 1998 Christian Nentwich ([email protected])
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+{$ASMMODE att}
+
+{
+ --------------------------------------------------------------------------
+ HermesClearInterface (ebp+..)
+   0: char8 *dest
+   4: int32 value
+   8: unsigned int width (already checked to be >0!)
+  12: unsigned int height (already checked to be >0!)
+  16: int add
+}
+procedure ClearMMX_32(hci: PHermesClearInterface); cdecl; assembler;
+asm
+        pushl %ebp
+        movl 8(%ebp),%ebp
+
+        movl 4(%ebp),%eax       // pixel value
+        movd 4(%ebp),%mm0
+
+        movl 12(%ebp),%edx      // height
+        movq %mm0,%mm1
+
+        psllq $32,%mm0
+        movl (%ebp),%edi        // destination
+
+        por %mm1,%mm0
+.L_y:
+        movl 8(%ebp),%ecx
+
+        movl %ecx,%ebx
+
+        shrl $1,%ecx
+        jz .L_last
+
+.L_x:
+        movq %mm0,(%edi)
+        addl $8,%edi
+
+        decl %ecx
+        jnz .L_x
+
+
+.L_last:
+        testl $1,%ebx
+        jz .L_endline
+
+        movl %eax,(%edi)
+        addl $4,%edi
+
+.L_endline:
+
+        addl 16(%ebp),%edi
+
+        decl %edx
+        jnz .L_y
+
+        emms
+        popl %ebp
+end;
+
+procedure ClearMMX_24(hci: PHermesClearInterface); cdecl; assembler;
+asm
+end;
+
+procedure ClearMMX_16(hci: PHermesClearInterface); cdecl; assembler;
+asm
+        pushl %ebp
+        movl 8(%ebp),%ebp
+
+        movl 4(%ebp),%eax       // pixel value
+        movl 4(%ebp),%ebx
+
+        movl 12(%ebp),%edx      // height
+        movl (%ebp),%edi        // destination
+
+        shll $16,%eax           // Duplicate pixel value
+        andl $0x0ffff,%ebx
+
+        orl %ebx,%eax
+
+        movd %eax,%mm0
+        movd %eax,%mm1
+
+        psllq $32,%mm0
+
+        por %mm1,%mm0
+.L_y:
+        movl 8(%ebp),%ecx
+
+        testl $3,%edi           // Check if destination is aligned mod 4
+        jz .L_aligned
+
+        movw %ax,(%edi)         // otherwise write one pixel
+        addl $2,%edi
+
+        decl %ecx
+        jz .L_endline
+
+.L_aligned:
+        movl %ecx,%ebx
+        shrl $2,%ecx
+
+        jz .L_last
+
+.L_x:
+        movq %mm0,(%edi)
+        addl $8,%edi
+
+        decl %ecx
+        jnz .L_x
+
+.L_last:
+        andl $3,%ebx
+        jz .L_endline
+
+        movw %ax,(%edi)         // Write trailing pixels
+        addl $2,%edi
+        decl %ebx
+        jz .L_endline
+
+        movw %ax,(%edi)
+        addl $2,%edi
+        decl %ebx
+        jz .L_endline
+
+        movw %ax,(%edi)
+        addl $2,%edi
+        decl %ebx
+        jnz .L_endline
+
+.L_endline:
+        addl 16(%ebp),%edi
+
+        decl %edx
+        jnz .L_y
+
+        emms
+        popl %ebp
+end;
+
+
+{
+ Clear8_x86 isnt optimised fully yet as it seems to be a tiny bit slower
+ than the C routine
+}
+procedure ClearMMX_8(hci: PHermesClearInterface); cdecl; assembler;
+asm
+        pushl %ebp
+        movl 8(%ebp),%ebp
+
+        movl 4(%ebp),%eax       // pixel value
+        movl 4(%ebp),%ebx
+
+        movl 12(%ebp),%edx      // height
+        andl $0x0ff,%ebx
+
+        shll $8,%eax            // Put the byte pixel value in all four bytes
+        movl (%ebp),%edi        // destination
+
+        movb %bl,%al
+        movb %bl,%bh
+
+        shll $16,%eax
+
+        movb %bh,%ah
+        movb %bl,%al
+
+        movd %eax,%mm0
+        movd %eax,%mm1
+
+        psllq $32,%mm0
+
+        por %mm1,%mm0
+
+.L_y:
+        movl 8(%ebp),%ecx
+
+        testl $3,%edi           // Align mod 4
+        jz .L_aligned
+
+        movl %edi,%ebx
+
+        andl $3,%ebx
+
+        movb %al,(%edi)         // Unrolled (copy & paste), align and jump
+        incl %edi               // if finished, faster than a loop...
+        decl %ecx
+        jz .L_endline
+        decl %ebx
+        jz .L_aligned
+
+        movb %al,(%edi)         // Second pixel
+        incl %edi
+        decl %ecx
+        jz .L_endline
+        decl %ebx
+        jz .L_aligned
+
+        movb %al,(%edi)         // Third pixel
+        incl %edi
+        decl %ecx
+        jz .L_endline
+        decl %ebx
+        jz .L_aligned
+
+.L_aligned:
+        movl %ecx,%ebx          // Store ecx for later
+
+        shrl $3,%ecx            // We write 8 pixels at once
+        jz .L_last
+
+.L_x:
+        movq %mm0,(%edi)
+        addl $8,%edi
+
+        decl %ecx
+        jnz .L_x
+
+.L_last:
+        movl %ebx,%ecx          // Clean up trailing pixels
+
+        andl $7,%ecx            // Could be up to 7 left
+        jz .L_endline
+
+        testb $0b100,%cl        // If theres less than four jump
+        jz .L_lessthanfour
+
+        movl %eax,(%edi)        // Otherwise write a dword
+        addl $4,%edi
+
+        subl $4,%ecx
+
+.L_lessthanfour:
+        rep
+ stosb              // Clean up the very rest
+
+.L_endline:
+        addl 16(%ebp),%edi
+
+        decl %edx
+        jnz .L_y
+
+        emms
+        popl %ebp
+end;

+ 0 - 67
packages/hermes/src/i386/mmx_main.as

@@ -1,67 +0,0 @@
-#
-# x86 format converters for HERMES
-# Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-# Most routines are (c) Glenn Fiedler ([email protected]), used with permission
-#
-
-#BITS 32
-
-.globl _ConvertMMX
-.globl _mmxreturn
-
-
-.text
-
-## _ConvertMMX:
-## [ESP+8] ConverterInfo*
-## --------------------------------------------------------------------------
-## ConverterInfo (ebp+..)
-##   0: void *s_pixels
-##   4: int s_width
-##   8: int s_height
-##  12: int s_add
-##  16: void *d_pixels
-##  20: int d_width
-##  24: int d_height
-##  28: int d_add
-##  32: void (*converter_function)()
-##  36: int32 *lookup
-
-_ConvertMMX:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%eax
-
-        cmpl $0,4(%eax)
-        je endconvert
-
-        movl %eax,%ebp
-
-        movl (%ebp),%esi
-        movl 16(%ebp),%edi
-
-y_loop:
-        movl 4(%ebp),%ecx
-
-        jmp *32(%ebp)
-
-_mmxreturn:
-        addl 12(%ebp),%esi
-        addl 28(%ebp),%edi
-
-        decl 8(%ebp)
-        jnz y_loop
-
-
-        popl %ebp
-
-endconvert:
-        emms
-
-        ret

+ 83 - 0
packages/hermes/src/i386/mmx_main.inc

@@ -0,0 +1,83 @@
+{
+    x86 format converters for HERMES
+    Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
+    Most routines are (c) Glenn Fiedler ([email protected]), used with permission
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+label
+  _MMXRETURN;
+
+{ _ConvertMMX:
+ [ESP+8] ConverterInfo*
+ --------------------------------------------------------------------------
+ ConverterInfo (ebp+..)
+   0: void *s_pixels
+   4: int s_width
+   8: int s_height
+  12: int s_add
+  16: void *d_pixels
+  20: int d_width
+  24: int d_height
+  28: int d_add
+  32: void (*converter_function)()
+  36: int32 *lookup
+}
+procedure ConvertMMX(hci: PHermesConverterInterface); cdecl; assembler;
+label
+  y_loop, endconvert;
+asm
+        movl 8(%ebp),%eax
+
+        cmpl $0,4(%eax)
+        je endconvert
+
+        pushl %ebp
+        movl %eax,%ebp
+
+        movl (%ebp),%esi
+        movl 16(%ebp),%edi
+
+y_loop:
+        movl 4(%ebp),%ecx
+
+        jmp *32(%ebp)
+
+_MMXRETURN:
+        addl 12(%ebp),%esi
+        addl 28(%ebp),%edi
+
+        decl 8(%ebp)
+        jnz y_loop
+
+
+        popl %ebp
+
+endconvert:
+        emms
+end;

+ 0 - 383
packages/hermes/src/i386/mmxp2_32.as

@@ -1,383 +0,0 @@
-#
-# pII-optimised MMX format converters for HERMES
-# Copyright (c) 1998 Christian Nentwich ([email protected])
-#   and (c) 1999 Jonathan Matthew ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-# COPYRIGHT NOTICE
-#
-# This file partly contains code that is (c) Intel Corporation, specifically
-# the mode detection routine, and the converter to 15 bit (8 pixel
-# conversion routine from the mmx programming tutorial pages).
-#
-#
-# These routines aren't exactly pII optimised - it's just that as they
-# are, theyre terrible on p5 MMXs, but less so on pIIs.  Someone needs to
-# optimise them for p5 MMXs..
-
-#BITS 32
-
-
-.globl _ConvertMMXpII32_24RGB888
-.globl _ConvertMMXpII32_16RGB565
-.globl _ConvertMMXpII32_16BGR565
-.globl _ConvertMMXpII32_16RGB555
-.globl _ConvertMMXpII32_16BGR555
-
-.extern _mmxreturn
-
-.data
-
-.align 8
-
-## Constants for conversion routines
-
-mmx32_rgb888_mask: .long 0x00ffffff,0x00ffffff
-
-mmx32_rgb565_b: .long 0x000000f8,0x000000f8
-mmx32_rgb565_g: .long 0x0000fc00,0x0000fc00
-mmx32_rgb565_r: .long 0x00f80000,0x00f80000
-
-mmx32_rgb555_rb: .long 0x00f800f8,0x00f800f8
-mmx32_rgb555_g: .long 0x0000f800,0x0000f800
-mmx32_rgb555_mul: .long 0x20000008,0x20000008
-mmx32_bgr555_mul: .long 0x00082000,0x00082000
-
-
-
-.text
-
-_ConvertMMXpII32_24RGB888:
-
-        # set up mm6 as the mask, mm7 as zero
-        movq mmx32_rgb888_mask,%mm6
-        pxor %mm7,%mm7
-
-        movl %ecx,%edx                  # save ecx
-        andl $0x0fffffffc,%ecx          # clear lower two bits
-        jnz _ConvertMMXpII32_24RGB888.L1
-        jmp _ConvertMMXpII32_24RGB888.L2
-
-_ConvertMMXpII32_24RGB888.L1:
-
-        movq (%esi),%mm0                # A R G B a r g b
-        pand %mm6,%mm0                  # 0 R G B 0 r g b
-        movq 8(%esi),%mm1               # A R G B a r g b
-        pand %mm6,%mm1                  # 0 R G B 0 r g b
-
-        movq %mm0,%mm2                  # 0 R G B 0 r g b
-        punpckhdq %mm7,%mm2             # 0 0 0 0 0 R G B
-        punpckldq %mm7,%mm0             # 0 0 0 0 0 r g b
-        psllq $24,%mm2                  # 0 0 R G B 0 0 0
-        por %mm2,%mm0                   # 0 0 R G B r g b
-
-        movq %mm1,%mm3                  # 0 R G B 0 r g b
-        psllq $48,%mm3                  # g b 0 0 0 0 0 0
-        por %mm3,%mm0                   # g b R G B r g b
-
-        movq %mm1,%mm4                  # 0 R G B 0 r g b
-        punpckhdq %mm7,%mm4             # 0 0 0 0 0 R G B
-        punpckldq %mm7,%mm1             # 0 0 0 0 0 r g b
-        psrlq $16,%mm1                  # 0 0 0 R G B 0 r
-        psllq $8,%mm4                   # 0 0 0 0 R G B 0
-        por %mm4,%mm1                   # 0 0 0 0 R G B r
-
-        movq %mm0,(%edi)
-        addl $16,%esi
-        movd %mm1,8(%edi)
-        addl $12,%edi
-        subl $4,%ecx
-        jnz _ConvertMMXpII32_24RGB888.L1
-
-_ConvertMMXpII32_24RGB888.L2:
-        movl %edx,%ecx
-        andl $3,%ecx
-        jz _ConvertMMXpII32_24RGB888.L4
-_ConvertMMXpII32_24RGB888.L3:
-        movb (%esi),%al
-        movb 1(%esi),%bl
-        movb 2(%esi),%dl
-        movb %al,(%edi)
-        movb %bl,1(%edi)
-        movb %dl,2(%edi)
-        addl $4,%esi
-        addl $3,%edi
-        decl %ecx
-        jnz _ConvertMMXpII32_24RGB888.L3
-_ConvertMMXpII32_24RGB888.L4:
-        jmp _mmxreturn
-
-
-
-_ConvertMMXpII32_16RGB565:
-
-        # set up masks
-        movq mmx32_rgb565_b,%mm5
-        movq mmx32_rgb565_g,%mm6
-        movq mmx32_rgb565_r,%mm7
-
-        movl %ecx,%edx
-        shrl $2,%ecx
-        jnz _ConvertMMXpII32_16RGB565.L1
-        jmp _ConvertMMXpII32_16RGB565.L2 # not necessary at the moment, but doesnt hurt (much)
-
-_ConvertMMXpII32_16RGB565.L1:
-        movq (%esi),%mm0        # argb
-        movq %mm0,%mm1          # argb
-        pand %mm6,%mm0          # 00g0
-        movq %mm1,%mm3          # argb
-        pand %mm5,%mm1          # 000b
-        pand %mm7,%mm3          # 0r00
-        pslld $2,%mm1           # 0 0 000000bb bbb00000
-        por %mm1,%mm0           # 0 0 ggggggbb bbb00000
-        psrld $5,%mm0           # 0 0 00000ggg gggbbbbb
-
-        movq 8(%esi),%mm4       # argb
-        movq %mm4,%mm2          # argb
-        pand %mm6,%mm4          # 00g0
-        movq %mm2,%mm1          # argb
-        pand %mm5,%mm2          # 000b
-        pand %mm7,%mm1          # 0r00
-        pslld $2,%mm2           # 0 0 000000bb bbb00000
-        por %mm2,%mm4           # 0 0 ggggggbb bbb00000
-        psrld $5,%mm4           # 0 0 00000ggg gggbbbbb
-
-        packuswb %mm1,%mm3      # R 0 r 0
-        packssdw %mm4,%mm0      # as above.. ish
-        por %mm3,%mm0           # done.
-        movq %mm0,(%edi)
-
-        addl $16,%esi
-        addl $8,%edi
-        decl %ecx
-        jnz _ConvertMMXpII32_16RGB565.L1
-
-_ConvertMMXpII32_16RGB565.L2:
-        movl %edx,%ecx
-        andl $3,%ecx
-        jz _ConvertMMXpII32_16RGB565.L4
-_ConvertMMXpII32_16RGB565.L3:
-        movb (%esi),%al
-        movb 1(%esi),%bh
-        movb 2(%esi),%ah
-        shrb $3,%al
-        andl $0x0F81F,%eax         # BYTE?
-        shrl $5,%ebx
-        andl $0x07E0,%ebx          # BYTE?
-        addl %ebx,%eax
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-        jnz _ConvertMMXpII32_16RGB565.L3
-
-_ConvertMMXpII32_16RGB565.L4:
-        jmp _mmxreturn
-
-
-_ConvertMMXpII32_16BGR565:
-
-        movq mmx32_rgb565_r,%mm5
-        movq mmx32_rgb565_g,%mm6
-        movq mmx32_rgb565_b,%mm7
-
-        movl %ecx,%edx
-        shrl $2,%ecx
-        jnz _ConvertMMXpII32_16BGR565.L1
-        jmp _ConvertMMXpII32_16BGR565.L2
-
-_ConvertMMXpII32_16BGR565.L1:
-        movq (%esi),%mm0                # a r g b
-        movq %mm0,%mm1                  # a r g b
-        pand %mm6,%mm0                  # 0 0 g 0
-        movq %mm1,%mm3                  # a r g b
-        pand %mm5,%mm1                  # 0 r 0 0
-        pand %mm7,%mm3                  # 0 0 0 b
-
-        psllq $16,%mm3                  # 0 b 0 0
-        psrld $14,%mm1                  # 0 0 000000rr rrr00000
-        por %mm1,%mm0                   # 0 0 ggggggrr rrr00000
-        psrld $5,%mm0                   # 0 0 00000ggg gggrrrrr
-
-        movq 8(%esi),%mm4               # a r g b
-        movq %mm4,%mm2                  # a r g b
-        pand %mm6,%mm4                  # 0 0 g 0
-        movq %mm2,%mm1                  # a r g b
-        pand %mm5,%mm2                  # 0 r 0 0
-        pand %mm7,%mm1                  # 0 0 0 b
-
-        psllq $16,%mm1                  # 0 b 0 0
-        psrld $14,%mm2                  # 0 0 000000rr rrr00000
-        por %mm2,%mm4                   # 0 0 ggggggrr rrr00000
-        psrld $5,%mm4                   # 0 0 00000ggg gggrrrrr
-
-        packuswb %mm1,%mm3              # BBBBB000 00000000 bbbbb000 00000000
-        packssdw %mm4,%mm0              # 00000GGG GGGRRRRR 00000GGG GGGRRRRR
-        por %mm3,%mm0                   # BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr
-        movq %mm0,(%edi)
-
-        addl $16,%esi
-        addl $8,%edi
-        decl %ecx
-        jnz _ConvertMMXpII32_16BGR565.L1
-
-_ConvertMMXpII32_16BGR565.L2:
-        andl $3,%edx
-        jz _ConvertMMXpII32_16BGR565.L4
-_ConvertMMXpII32_16BGR565.L3:
-        movb 2(%esi),%al
-        movb 1(%esi),%bh
-        movb (%esi),%ah
-        shrb $3,%al
-        andl $0x0F81F,%eax                 # BYTE ?
-        shrl $5,%ebx
-        andl $0x07E0,%ebx                  # BYTE ?
-        addl %ebx,%eax
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %edx
-        jnz _ConvertMMXpII32_16BGR565.L3
-
-_ConvertMMXpII32_16BGR565.L4:
-        jmp _mmxreturn
-
-_ConvertMMXpII32_16BGR555:
-
-        # the 16BGR555 converter is identical to the RGB555 one,
-        # except it uses a different multiplier for the pmaddwd
-        # instruction.  cool huh.
-
-        movq mmx32_bgr555_mul,%mm7
-        jmp _convert_bgr555_cheat
-
-# This is the same as the Intel version.. they obviously went to
-# much more trouble to expand/coil the loop than I did, so theirs
-# would almost certainly be faster, even if only a little.
-# I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
-# (I think) a more accurate name..
-_ConvertMMXpII32_16RGB555:
-
-        movq mmx32_rgb555_mul,%mm7
-_convert_bgr555_cheat:
-        movq mmx32_rgb555_g,%mm6
-
-        movl %ecx,%edx                     # Save ecx
-
-        andl $0x0fffffff8,%ecx             # clear lower three bits
-        jnz _convert_bgr555_cheat.L_OK
-        jmp _convert_bgr555_cheat.L2
-
-_convert_bgr555_cheat.L_OK:
-
-        movq 8(%esi),%mm2
-
-        movq (%esi),%mm0
-        movq %mm2,%mm3
-
-        pand mmx32_rgb555_rb,%mm3
-        movq %mm0,%mm1
-
-        pand mmx32_rgb555_rb,%mm1
-        pmaddwd %mm7,%mm3
-
-        pmaddwd %mm7,%mm1
-        pand %mm6,%mm2
-
-_convert_bgr555_cheat.L1:
-        movq 24(%esi),%mm4
-        pand %mm6,%mm0
-
-        movq 16(%esi),%mm5
-        por %mm2,%mm3
-
-        psrld $6,%mm3
-        por %mm0,%mm1
-
-        movq %mm4,%mm0
-        psrld $6,%mm1
-
-        pand mmx32_rgb555_rb,%mm0
-        packssdw %mm3,%mm1
-
-        movq %mm5,%mm3
-        pmaddwd %mm7,%mm0
-
-        pand mmx32_rgb555_rb,%mm3
-        pand %mm6,%mm4
-
-        movq %mm1,(%edi)
-        pmaddwd %mm7,%mm3
-
-        addl $32,%esi
-        por %mm0,%mm4
-
-        pand %mm6,%mm5
-        psrld $6,%mm4
-
-        movq 8(%esi),%mm2
-        por %mm3,%mm5
-
-        movq (%esi),%mm0
-        psrld $6,%mm5
-
-        movq %mm2,%mm3
-        movq %mm0,%mm1
-
-        pand mmx32_rgb555_rb,%mm3
-        packssdw %mm4,%mm5
-
-        pand mmx32_rgb555_rb,%mm1
-        pand %mm6,%mm2
-
-        movq %mm5,8(%edi)
-        pmaddwd %mm7,%mm3
-
-        pmaddwd %mm7,%mm1
-        addl $16,%edi
-
-        subl $8,%ecx
-        jz _convert_bgr555_cheat.L2
-        jmp _convert_bgr555_cheat.L1
-
-
-_convert_bgr555_cheat.L2:
-        movl %edx,%ecx
-
-        andl $7,%ecx
-        jz _convert_bgr555_cheat.L4
-
-_convert_bgr555_cheat.L3:
-        movl (%esi),%ebx
-        addl $4,%esi
-
-        movl %ebx,%eax
-        movl %ebx,%edx
-
-        shrl $3,%eax
-        shrl $6,%edx
-
-        andl $0b0000000000011111,%eax
-        andl $0b0000001111100000,%edx
-
-        shrl $9,%ebx
-
-        orl %edx,%eax
-
-        andl $0b0111110000000000,%ebx
-
-        orl %ebx,%eax
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-        decl %ecx
-        jnz _convert_bgr555_cheat.L3
-
-_convert_bgr555_cheat.L4:
-        jmp _mmxreturn

+ 391 - 0
packages/hermes/src/i386/mmxp2_32.inc

@@ -0,0 +1,391 @@
+{
+    pII-optimised MMX format converters for HERMES
+    Copyright (c) 1998 Christian Nentwich ([email protected])
+      and (c) 1999 Jonathan Matthew ([email protected])
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+    COPYRIGHT NOTICE
+
+    This file partly contains code that is (c) Intel Corporation, specifically
+    the mode detection routine, and the converter to 15 bit (8 pixel
+    conversion routine from the mmx programming tutorial pages).
+
+    These routines aren't exactly pII optimised - it's just that as they
+    are, theyre terrible on p5 MMXs, but less so on pIIs.  Someone needs to
+    optimise them for p5 MMXs..
+}
+
+// Constants for conversion routines
+const
+  mmx32_rgb888_mask: QWord = $00ffffff00ffffff;
+
+  mmx32_rgb565_b: QWord = $000000f8000000f8;
+  mmx32_rgb565_g: QWord = $0000fc000000fc00;
+  mmx32_rgb565_r: QWord = $00f8000000f80000;
+
+  mmx32_rgb555_rb: QWord  = $00f800f800f800f8;
+  mmx32_rgb555_g: QWord   = $0000f8000000f800;
+  mmx32_rgb555_mul: QWord = $2000000820000008;
+  mmx32_bgr555_mul: QWord = $0008200000082000;
+
+procedure ConvertMMXpII32_24RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // set up mm6 as the mask, mm7 as zero
+        movq mmx32_rgb888_mask,%mm6
+        pxor %mm7,%mm7
+
+        movl %ecx,%edx                  // save ecx
+        andl $0x0fffffffc,%ecx          // clear lower two bits
+        jnz .L1
+        jmp .L2
+
+.L1:
+
+        movq (%esi),%mm0                // A R G B a r g b
+        pand %mm6,%mm0                  // 0 R G B 0 r g b
+        movq 8(%esi),%mm1               // A R G B a r g b
+        pand %mm6,%mm1                  // 0 R G B 0 r g b
+
+        movq %mm0,%mm2                  // 0 R G B 0 r g b
+        punpckhdq %mm7,%mm2             // 0 0 0 0 0 R G B
+        punpckldq %mm7,%mm0             // 0 0 0 0 0 r g b
+        psllq $24,%mm2                  // 0 0 R G B 0 0 0
+        por %mm2,%mm0                   // 0 0 R G B r g b
+
+        movq %mm1,%mm3                  // 0 R G B 0 r g b
+        psllq $48,%mm3                  // g b 0 0 0 0 0 0
+        por %mm3,%mm0                   // g b R G B r g b
+
+        movq %mm1,%mm4                  // 0 R G B 0 r g b
+        punpckhdq %mm7,%mm4             // 0 0 0 0 0 R G B
+        punpckldq %mm7,%mm1             // 0 0 0 0 0 r g b
+        psrlq $16,%mm1                  // 0 0 0 R G B 0 r
+        psllq $8,%mm4                   // 0 0 0 0 R G B 0
+        por %mm4,%mm1                   // 0 0 0 0 R G B r
+
+        movq %mm0,(%edi)
+        addl $16,%esi
+        movd %mm1,8(%edi)
+        addl $12,%edi
+        subl $4,%ecx
+        jnz .L1
+
+.L2:
+        movl %edx,%ecx
+        andl $3,%ecx
+        jz .L4
+.L3:
+        movb (%esi),%al
+        movb 1(%esi),%bl
+        movb 2(%esi),%dl
+        movb %al,(%edi)
+        movb %bl,1(%edi)
+        movb %dl,2(%edi)
+        addl $4,%esi
+        addl $3,%edi
+        decl %ecx
+        jnz .L3
+.L4:
+        jmp _MMXRETURN
+end;
+
+procedure ConvertMMXpII32_16RGB565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // set up masks
+        movq mmx32_rgb565_b,%mm5
+        movq mmx32_rgb565_g,%mm6
+        movq mmx32_rgb565_r,%mm7
+
+        movl %ecx,%edx
+        shrl $2,%ecx
+        jnz .L1
+        jmp .L2 // not necessary at the moment, but doesnt hurt (much)
+
+.L1:
+        movq (%esi),%mm0        // argb
+        movq %mm0,%mm1          // argb
+        pand %mm6,%mm0          // 00g0
+        movq %mm1,%mm3          // argb
+        pand %mm5,%mm1          // 000b
+        pand %mm7,%mm3          // 0r00
+        pslld $2,%mm1           // 0 0 000000bb bbb00000
+        por %mm1,%mm0           // 0 0 ggggggbb bbb00000
+        psrld $5,%mm0           // 0 0 00000ggg gggbbbbb
+
+        movq 8(%esi),%mm4       // argb
+        movq %mm4,%mm2          // argb
+        pand %mm6,%mm4          // 00g0
+        movq %mm2,%mm1          // argb
+        pand %mm5,%mm2          // 000b
+        pand %mm7,%mm1          // 0r00
+        pslld $2,%mm2           // 0 0 000000bb bbb00000
+        por %mm2,%mm4           // 0 0 ggggggbb bbb00000
+        psrld $5,%mm4           // 0 0 00000ggg gggbbbbb
+
+        packuswb %mm1,%mm3      // R 0 r 0
+        packssdw %mm4,%mm0      // as above.. ish
+        por %mm3,%mm0           // done.
+        movq %mm0,(%edi)
+
+        addl $16,%esi
+        addl $8,%edi
+        decl %ecx
+        jnz .L1
+
+.L2:
+        movl %edx,%ecx
+        andl $3,%ecx
+        jz .L4
+.L3:
+        movb (%esi),%al
+        movb 1(%esi),%bh
+        movb 2(%esi),%ah
+        shrb $3,%al
+        andl $0x0F81F,%eax         // BYTE?
+        shrl $5,%ebx
+        andl $0x07E0,%ebx          // BYTE?
+        addl %ebx,%eax
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+        jnz .L3
+
+.L4:
+        jmp _MMXRETURN
+end;
+
+procedure ConvertMMXpII32_16BGR565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        movq mmx32_rgb565_r,%mm5
+        movq mmx32_rgb565_g,%mm6
+        movq mmx32_rgb565_b,%mm7
+
+        movl %ecx,%edx
+        shrl $2,%ecx
+        jnz .L1
+        jmp .L2
+
+.L1:
+        movq (%esi),%mm0                // a r g b
+        movq %mm0,%mm1                  // a r g b
+        pand %mm6,%mm0                  // 0 0 g 0
+        movq %mm1,%mm3                  // a r g b
+        pand %mm5,%mm1                  // 0 r 0 0
+        pand %mm7,%mm3                  // 0 0 0 b
+
+        psllq $16,%mm3                  // 0 b 0 0
+        psrld $14,%mm1                  // 0 0 000000rr rrr00000
+        por %mm1,%mm0                   // 0 0 ggggggrr rrr00000
+        psrld $5,%mm0                   // 0 0 00000ggg gggrrrrr
+
+        movq 8(%esi),%mm4               // a r g b
+        movq %mm4,%mm2                  // a r g b
+        pand %mm6,%mm4                  // 0 0 g 0
+        movq %mm2,%mm1                  // a r g b
+        pand %mm5,%mm2                  // 0 r 0 0
+        pand %mm7,%mm1                  // 0 0 0 b
+
+        psllq $16,%mm1                  // 0 b 0 0
+        psrld $14,%mm2                  // 0 0 000000rr rrr00000
+        por %mm2,%mm4                   // 0 0 ggggggrr rrr00000
+        psrld $5,%mm4                   // 0 0 00000ggg gggrrrrr
+
+        packuswb %mm1,%mm3              // BBBBB000 00000000 bbbbb000 00000000
+        packssdw %mm4,%mm0              // 00000GGG GGGRRRRR 00000GGG GGGRRRRR
+        por %mm3,%mm0                   // BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr
+        movq %mm0,(%edi)
+
+        addl $16,%esi
+        addl $8,%edi
+        decl %ecx
+        jnz .L1
+
+.L2:
+        andl $3,%edx
+        jz .L4
+.L3:
+        movb 2(%esi),%al
+        movb 1(%esi),%bh
+        movb (%esi),%ah
+        shrb $3,%al
+        andl $0x0F81F,%eax                 // BYTE ?
+        shrl $5,%ebx
+        andl $0x07E0,%ebx                  // BYTE ?
+        addl %ebx,%eax
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %edx
+        jnz .L3
+
+.L4:
+        jmp _MMXRETURN
+end;
+
+label
+  _convert_bgr555_cheat;
+
+procedure ConvertMMXpII32_16BGR555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // the 16BGR555 converter is identical to the RGB555 one,
+        // except it uses a different multiplier for the pmaddwd
+        // instruction.  cool huh.
+
+        movq mmx32_bgr555_mul,%mm7
+        jmp _convert_bgr555_cheat
+end;
+
+// This is the same as the Intel version.. they obviously went to
+// much more trouble to expand/coil the loop than I did, so theirs
+// would almost certainly be faster, even if only a little.
+// I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
+// (I think) a more accurate name..
+procedure ConvertMMXpII32_16RGB555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        movq mmx32_rgb555_mul,%mm7
+_convert_bgr555_cheat:
+        movq mmx32_rgb555_g,%mm6
+
+        movl %ecx,%edx                     // Save ecx
+
+        andl $0x0fffffff8,%ecx             // clear lower three bits
+        jnz .L_OK
+        jmp .L2
+
+.L_OK:
+
+        movq 8(%esi),%mm2
+
+        movq (%esi),%mm0
+        movq %mm2,%mm3
+
+        pand mmx32_rgb555_rb,%mm3
+        movq %mm0,%mm1
+
+        pand mmx32_rgb555_rb,%mm1
+        pmaddwd %mm7,%mm3
+
+        pmaddwd %mm7,%mm1
+        pand %mm6,%mm2
+
+.L1:
+        movq 24(%esi),%mm4
+        pand %mm6,%mm0
+
+        movq 16(%esi),%mm5
+        por %mm2,%mm3
+
+        psrld $6,%mm3
+        por %mm0,%mm1
+
+        movq %mm4,%mm0
+        psrld $6,%mm1
+
+        pand mmx32_rgb555_rb,%mm0
+        packssdw %mm3,%mm1
+
+        movq %mm5,%mm3
+        pmaddwd %mm7,%mm0
+
+        pand mmx32_rgb555_rb,%mm3
+        pand %mm6,%mm4
+
+        movq %mm1,(%edi)
+        pmaddwd %mm7,%mm3
+
+        addl $32,%esi
+        por %mm0,%mm4
+
+        pand %mm6,%mm5
+        psrld $6,%mm4
+
+        movq 8(%esi),%mm2
+        por %mm3,%mm5
+
+        movq (%esi),%mm0
+        psrld $6,%mm5
+
+        movq %mm2,%mm3
+        movq %mm0,%mm1
+
+        pand mmx32_rgb555_rb,%mm3
+        packssdw %mm4,%mm5
+
+        pand mmx32_rgb555_rb,%mm1
+        pand %mm6,%mm2
+
+        movq %mm5,8(%edi)
+        pmaddwd %mm7,%mm3
+
+        pmaddwd %mm7,%mm1
+        addl $16,%edi
+
+        subl $8,%ecx
+        jz .L2
+        jmp .L1
+
+
+.L2:
+        movl %edx,%ecx
+
+        andl $7,%ecx
+        jz .L4
+
+.L3:
+        movl (%esi),%ebx
+        addl $4,%esi
+
+        movl %ebx,%eax
+        movl %ebx,%edx
+
+        shrl $3,%eax
+        shrl $6,%edx
+
+        andl $0b0000000000011111,%eax
+        andl $0b0000001111100000,%edx
+
+        shrl $9,%ebx
+
+        orl %edx,%eax
+
+        andl $0b0111110000000000,%ebx
+
+        orl %ebx,%eax
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+        decl %ecx
+        jnz .L3
+
+.L4:
+        jmp _MMXRETURN
+end;

+ 0 - 163
packages/hermes/src/i386/mmxp_32.as

@@ -1,163 +0,0 @@
-#
-# MMX format converters for HERMES
-# Copyright (c) 1998 Christian Nentwich ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-# COPYRIGHT NOTICE
-#
-# This file partly contains code that is (c) Intel Corporation, specifically
-# the mode detection routine, and the converter to 15 bit (8 pixel
-# conversion routine from the mmx programming tutorial pages).
-#
-
-#BITS 32
-
-
-.globl _ConvertMMXp32_16RGB555
-
-.extern _mmxreturn
-
-.data
-
-.align 8
-
-mmx32_rgb555_rb: .long 0x00f800f8,0x00f800f8 # Constants for conversion routines
-mmx32_rgb555_add: .long 0x20000008,0x20000008
-mmx32_rgb555_g: .long 0x0000f800,0x0000f800
-
-
-
-.text
-
-
-
-## Gone for now, it didnt draw correctly AND was slower than the x86 routine
-_ConvertMMXp32_16RGB565:
-
-        jmp _mmxreturn
-
-
-
-
-_ConvertMMXp32_16RGB555:
-
-        movq mmx32_rgb555_add,%mm7
-        movq mmx32_rgb555_g,%mm6
-
-        movl %ecx,%edx                     # Save ecx
-
-        andl $0x0fffffff8,%ecx             # clear lower three bits
-        jnz _ConvertMMXp32_16RGB555.L_OK
-        jmp _ConvertMMXp32_16RGB555.L2
-
-_ConvertMMXp32_16RGB555.L_OK:
-
-        movq 8(%esi),%mm2
-
-        movq (%esi),%mm0
-        movq %mm2,%mm3
-
-        pand mmx32_rgb555_rb,%mm3
-        movq %mm0,%mm1
-
-        pand mmx32_rgb555_rb,%mm1
-        pmaddwd %mm7,%mm3
-
-        pmaddwd %mm7,%mm1
-        pand %mm6,%mm2
-
-_ConvertMMXp32_16RGB555.L1:
-        movq 24(%esi),%mm4
-        pand %mm6,%mm0
-
-        movq 16(%esi),%mm5
-        por %mm2,%mm3
-
-        psrld $6,%mm3
-        por %mm0,%mm1
-
-        movq %mm4,%mm0
-        psrld $6,%mm1
-
-        pand mmx32_rgb555_rb,%mm0
-        packssdw %mm3,%mm1
-
-        movq %mm5,%mm3
-        pmaddwd %mm7,%mm0
-
-        pand mmx32_rgb555_rb,%mm3
-        pand %mm6,%mm4
-
-        movq %mm1,(%edi)
-        pmaddwd %mm7,%mm3
-
-        addl $32,%esi
-        por %mm0,%mm4
-
-        pand %mm6,%mm5
-        psrld $6,%mm4
-
-        movq 8(%esi),%mm2
-        por %mm3,%mm5
-
-        movq (%esi),%mm0
-        psrld $6,%mm5
-
-        movq %mm2,%mm3
-        movq %mm0,%mm1
-
-        pand mmx32_rgb555_rb,%mm3
-        packssdw %mm4,%mm5
-
-        pand mmx32_rgb555_rb,%mm1
-        pand %mm6,%mm2
-
-        movq %mm5,8(%edi)
-        pmaddwd %mm7,%mm3
-
-        pmaddwd %mm7,%mm1
-        addl $16,%edi
-
-        subl $8,%ecx
-        jz _ConvertMMXp32_16RGB555.L2
-        jmp _ConvertMMXp32_16RGB555.L1
-
-
-_ConvertMMXp32_16RGB555.L2:
-        movl %edx,%ecx
-
-        andl $7,%ecx
-        jz _ConvertMMXp32_16RGB555.L4
-
-_ConvertMMXp32_16RGB555.L3:
-        movl (%esi),%ebx
-        addl $4,%esi
-
-        movl %ebx,%eax
-        movl %ebx,%edx
-
-        shrl $3,%eax
-        shrl $6,%edx
-
-        andl $0b0000000000011111,%eax
-        andl $0b0000001111100000,%edx
-
-        shrl $9,%ebx
-
-        orl %edx,%eax
-
-        andl $0b0111110000000000,%ebx
-
-        orl %ebx,%eax
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-        decl %ecx
-        jnz _ConvertMMXp32_16RGB555.L3
-
-_ConvertMMXp32_16RGB555.L4:
-        jmp _mmxreturn

+ 168 - 0
packages/hermes/src/i386/mmxp_32.inc

@@ -0,0 +1,168 @@
+{
+    MMX format converters for HERMES
+    Copyright (c) 1998 Christian Nentwich ([email protected])
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+    COPYRIGHT NOTICE
+
+    This file partly contains code that is (c) Intel Corporation, specifically
+    the mode detection routine, and the converter to 15 bit (8 pixel
+    conversion routine from the mmx programming tutorial pages).
+}
+
+// Constants for conversion routines
+const
+  //mmx32_rgb555_rb: QWord = $00f800f800f800f8;
+  mmx32_rgb555_add: QWord = $2000000820000008;
+  //mmx32_rgb555_g: QWord = $0000f8000000f800;
+
+{## Gone for now, it didnt draw correctly AND was slower than the x86 routine
+_ConvertMMXp32_16RGB565:
+
+        jmp _MMXRETURN}
+
+procedure ConvertMMXp32_16RGB555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        movq mmx32_rgb555_add,%mm7
+        movq mmx32_rgb555_g,%mm6
+
+        movl %ecx,%edx                     // Save ecx
+
+        andl $0x0fffffff8,%ecx             // clear lower three bits
+        jnz .L_OK
+        jmp .L2
+
+.L_OK:
+
+        movq 8(%esi),%mm2
+
+        movq (%esi),%mm0
+        movq %mm2,%mm3
+
+        pand mmx32_rgb555_rb,%mm3
+        movq %mm0,%mm1
+
+        pand mmx32_rgb555_rb,%mm1
+        pmaddwd %mm7,%mm3
+
+        pmaddwd %mm7,%mm1
+        pand %mm6,%mm2
+
+.L1:
+        movq 24(%esi),%mm4
+        pand %mm6,%mm0
+
+        movq 16(%esi),%mm5
+        por %mm2,%mm3
+
+        psrld $6,%mm3
+        por %mm0,%mm1
+
+        movq %mm4,%mm0
+        psrld $6,%mm1
+
+        pand mmx32_rgb555_rb,%mm0
+        packssdw %mm3,%mm1
+
+        movq %mm5,%mm3
+        pmaddwd %mm7,%mm0
+
+        pand mmx32_rgb555_rb,%mm3
+        pand %mm6,%mm4
+
+        movq %mm1,(%edi)
+        pmaddwd %mm7,%mm3
+
+        addl $32,%esi
+        por %mm0,%mm4
+
+        pand %mm6,%mm5
+        psrld $6,%mm4
+
+        movq 8(%esi),%mm2
+        por %mm3,%mm5
+
+        movq (%esi),%mm0
+        psrld $6,%mm5
+
+        movq %mm2,%mm3
+        movq %mm0,%mm1
+
+        pand mmx32_rgb555_rb,%mm3
+        packssdw %mm4,%mm5
+
+        pand mmx32_rgb555_rb,%mm1
+        pand %mm6,%mm2
+
+        movq %mm5,8(%edi)
+        pmaddwd %mm7,%mm3
+
+        pmaddwd %mm7,%mm1
+        addl $16,%edi
+
+        subl $8,%ecx
+        jz .L2
+        jmp .L1
+
+
+.L2:
+        movl %edx,%ecx
+
+        andl $7,%ecx
+        jz .L4
+
+.L3:
+        movl (%esi),%ebx
+        addl $4,%esi
+
+        movl %ebx,%eax
+        movl %ebx,%edx
+
+        shrl $3,%eax
+        shrl $6,%edx
+
+        andl $0b0000000000011111,%eax
+        andl $0b0000001111100000,%edx
+
+        shrl $9,%ebx
+
+        orl %edx,%eax
+
+        andl $0b0111110000000000,%ebx
+
+        orl %ebx,%eax
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+        decl %ecx
+        jnz .L3
+
+.L4:
+        jmp _MMXRETURN
+end;

+ 0 - 2059
packages/hermes/src/i386/x8616lut.as

@@ -1,2059 +0,0 @@
-
-.data
-
-_ConvertX86p16_32RGB888_LUT_X86:
-                .long 0x00000
-                .long 0x00000
-                .long 0x00008
-                .long 0x02000
-                .long 0x00010
-                .long 0x04000
-                .long 0x00018
-                .long 0x06100
-                .long 0x00020
-                .long 0x08100
-                .long 0x00029
-                .long 0x0a100
-                .long 0x00031
-                .long 0x0c200
-                .long 0x00039
-                .long 0x0e200
-                .long 0x00041
-                .long 0x80000
-                .long 0x0004a
-                .long 0x82000
-                .long 0x00052
-                .long 0x84000
-                .long 0x0005a
-                .long 0x86100
-                .long 0x00062
-                .long 0x88100
-                .long 0x0006a
-                .long 0x8a100
-                .long 0x00073
-                .long 0x8c200
-                .long 0x0007b
-                .long 0x8e200
-                .long 0x00083
-                .long 0x100000
-                .long 0x0008b
-                .long 0x102000
-                .long 0x00094
-                .long 0x104000
-                .long 0x0009c
-                .long 0x106100
-                .long 0x000a4
-                .long 0x108100
-                .long 0x000ac
-                .long 0x10a100
-                .long 0x000b4
-                .long 0x10c200
-                .long 0x000bd
-                .long 0x10e200
-                .long 0x000c5
-                .long 0x180000
-                .long 0x000cd
-                .long 0x182000
-                .long 0x000d5
-                .long 0x184000
-                .long 0x000de
-                .long 0x186100
-                .long 0x000e6
-                .long 0x188100
-                .long 0x000ee
-                .long 0x18a100
-                .long 0x000f6
-                .long 0x18c200
-                .long 0x000ff
-                .long 0x18e200
-                .long 0x00400
-                .long 0x200000
-                .long 0x00408
-                .long 0x202000
-                .long 0x00410
-                .long 0x204000
-                .long 0x00418
-                .long 0x206100
-                .long 0x00420
-                .long 0x208100
-                .long 0x00429
-                .long 0x20a100
-                .long 0x00431
-                .long 0x20c200
-                .long 0x00439
-                .long 0x20e200
-                .long 0x00441
-                .long 0x290000
-                .long 0x0044a
-                .long 0x292000
-                .long 0x00452
-                .long 0x294000
-                .long 0x0045a
-                .long 0x296100
-                .long 0x00462
-                .long 0x298100
-                .long 0x0046a
-                .long 0x29a100
-                .long 0x00473
-                .long 0x29c200
-                .long 0x0047b
-                .long 0x29e200
-                .long 0x00483
-                .long 0x310000
-                .long 0x0048b
-                .long 0x312000
-                .long 0x00494
-                .long 0x314000
-                .long 0x0049c
-                .long 0x316100
-                .long 0x004a4
-                .long 0x318100
-                .long 0x004ac
-                .long 0x31a100
-                .long 0x004b4
-                .long 0x31c200
-                .long 0x004bd
-                .long 0x31e200
-                .long 0x004c5
-                .long 0x390000
-                .long 0x004cd
-                .long 0x392000
-                .long 0x004d5
-                .long 0x394000
-                .long 0x004de
-                .long 0x396100
-                .long 0x004e6
-                .long 0x398100
-                .long 0x004ee
-                .long 0x39a100
-                .long 0x004f6
-                .long 0x39c200
-                .long 0x004ff
-                .long 0x39e200
-                .long 0x00800
-                .long 0x410000
-                .long 0x00808
-                .long 0x412000
-                .long 0x00810
-                .long 0x414000
-                .long 0x00818
-                .long 0x416100
-                .long 0x00820
-                .long 0x418100
-                .long 0x00829
-                .long 0x41a100
-                .long 0x00831
-                .long 0x41c200
-                .long 0x00839
-                .long 0x41e200
-                .long 0x00841
-                .long 0x4a0000
-                .long 0x0084a
-                .long 0x4a2000
-                .long 0x00852
-                .long 0x4a4000
-                .long 0x0085a
-                .long 0x4a6100
-                .long 0x00862
-                .long 0x4a8100
-                .long 0x0086a
-                .long 0x4aa100
-                .long 0x00873
-                .long 0x4ac200
-                .long 0x0087b
-                .long 0x4ae200
-                .long 0x00883
-                .long 0x520000
-                .long 0x0088b
-                .long 0x522000
-                .long 0x00894
-                .long 0x524000
-                .long 0x0089c
-                .long 0x526100
-                .long 0x008a4
-                .long 0x528100
-                .long 0x008ac
-                .long 0x52a100
-                .long 0x008b4
-                .long 0x52c200
-                .long 0x008bd
-                .long 0x52e200
-                .long 0x008c5
-                .long 0x5a0000
-                .long 0x008cd
-                .long 0x5a2000
-                .long 0x008d5
-                .long 0x5a4000
-                .long 0x008de
-                .long 0x5a6100
-                .long 0x008e6
-                .long 0x5a8100
-                .long 0x008ee
-                .long 0x5aa100
-                .long 0x008f6
-                .long 0x5ac200
-                .long 0x008ff
-                .long 0x5ae200
-                .long 0x00c00
-                .long 0x620000
-                .long 0x00c08
-                .long 0x622000
-                .long 0x00c10
-                .long 0x624000
-                .long 0x00c18
-                .long 0x626100
-                .long 0x00c20
-                .long 0x628100
-                .long 0x00c29
-                .long 0x62a100
-                .long 0x00c31
-                .long 0x62c200
-                .long 0x00c39
-                .long 0x62e200
-                .long 0x00c41
-                .long 0x6a0000
-                .long 0x00c4a
-                .long 0x6a2000
-                .long 0x00c52
-                .long 0x6a4000
-                .long 0x00c5a
-                .long 0x6a6100
-                .long 0x00c62
-                .long 0x6a8100
-                .long 0x00c6a
-                .long 0x6aa100
-                .long 0x00c73
-                .long 0x6ac200
-                .long 0x00c7b
-                .long 0x6ae200
-                .long 0x00c83
-                .long 0x730000
-                .long 0x00c8b
-                .long 0x732000
-                .long 0x00c94
-                .long 0x734000
-                .long 0x00c9c
-                .long 0x736100
-                .long 0x00ca4
-                .long 0x738100
-                .long 0x00cac
-                .long 0x73a100
-                .long 0x00cb4
-                .long 0x73c200
-                .long 0x00cbd
-                .long 0x73e200
-                .long 0x00cc5
-                .long 0x7b0000
-                .long 0x00ccd
-                .long 0x7b2000
-                .long 0x00cd5
-                .long 0x7b4000
-                .long 0x00cde
-                .long 0x7b6100
-                .long 0x00ce6
-                .long 0x7b8100
-                .long 0x00cee
-                .long 0x7ba100
-                .long 0x00cf6
-                .long 0x7bc200
-                .long 0x00cff
-                .long 0x7be200
-                .long 0x01000
-                .long 0x830000
-                .long 0x01008
-                .long 0x832000
-                .long 0x01010
-                .long 0x834000
-                .long 0x01018
-                .long 0x836100
-                .long 0x01020
-                .long 0x838100
-                .long 0x01029
-                .long 0x83a100
-                .long 0x01031
-                .long 0x83c200
-                .long 0x01039
-                .long 0x83e200
-                .long 0x01041
-                .long 0x8b0000
-                .long 0x0104a
-                .long 0x8b2000
-                .long 0x01052
-                .long 0x8b4000
-                .long 0x0105a
-                .long 0x8b6100
-                .long 0x01062
-                .long 0x8b8100
-                .long 0x0106a
-                .long 0x8ba100
-                .long 0x01073
-                .long 0x8bc200
-                .long 0x0107b
-                .long 0x8be200
-                .long 0x01083
-                .long 0x940000
-                .long 0x0108b
-                .long 0x942000
-                .long 0x01094
-                .long 0x944000
-                .long 0x0109c
-                .long 0x946100
-                .long 0x010a4
-                .long 0x948100
-                .long 0x010ac
-                .long 0x94a100
-                .long 0x010b4
-                .long 0x94c200
-                .long 0x010bd
-                .long 0x94e200
-                .long 0x010c5
-                .long 0x9c0000
-                .long 0x010cd
-                .long 0x9c2000
-                .long 0x010d5
-                .long 0x9c4000
-                .long 0x010de
-                .long 0x9c6100
-                .long 0x010e6
-                .long 0x9c8100
-                .long 0x010ee
-                .long 0x9ca100
-                .long 0x010f6
-                .long 0x9cc200
-                .long 0x010ff
-                .long 0x9ce200
-                .long 0x01400
-                .long 0xa40000
-                .long 0x01408
-                .long 0xa42000
-                .long 0x01410
-                .long 0xa44000
-                .long 0x01418
-                .long 0xa46100
-                .long 0x01420
-                .long 0xa48100
-                .long 0x01429
-                .long 0xa4a100
-                .long 0x01431
-                .long 0xa4c200
-                .long 0x01439
-                .long 0xa4e200
-                .long 0x01441
-                .long 0xac0000
-                .long 0x0144a
-                .long 0xac2000
-                .long 0x01452
-                .long 0xac4000
-                .long 0x0145a
-                .long 0xac6100
-                .long 0x01462
-                .long 0xac8100
-                .long 0x0146a
-                .long 0xaca100
-                .long 0x01473
-                .long 0xacc200
-                .long 0x0147b
-                .long 0xace200
-                .long 0x01483
-                .long 0xb40000
-                .long 0x0148b
-                .long 0xb42000
-                .long 0x01494
-                .long 0xb44000
-                .long 0x0149c
-                .long 0xb46100
-                .long 0x014a4
-                .long 0xb48100
-                .long 0x014ac
-                .long 0xb4a100
-                .long 0x014b4
-                .long 0xb4c200
-                .long 0x014bd
-                .long 0xb4e200
-                .long 0x014c5
-                .long 0xbd0000
-                .long 0x014cd
-                .long 0xbd2000
-                .long 0x014d5
-                .long 0xbd4000
-                .long 0x014de
-                .long 0xbd6100
-                .long 0x014e6
-                .long 0xbd8100
-                .long 0x014ee
-                .long 0xbda100
-                .long 0x014f6
-                .long 0xbdc200
-                .long 0x014ff
-                .long 0xbde200
-                .long 0x01800
-                .long 0xc50000
-                .long 0x01808
-                .long 0xc52000
-                .long 0x01810
-                .long 0xc54000
-                .long 0x01818
-                .long 0xc56100
-                .long 0x01820
-                .long 0xc58100
-                .long 0x01829
-                .long 0xc5a100
-                .long 0x01831
-                .long 0xc5c200
-                .long 0x01839
-                .long 0xc5e200
-                .long 0x01841
-                .long 0xcd0000
-                .long 0x0184a
-                .long 0xcd2000
-                .long 0x01852
-                .long 0xcd4000
-                .long 0x0185a
-                .long 0xcd6100
-                .long 0x01862
-                .long 0xcd8100
-                .long 0x0186a
-                .long 0xcda100
-                .long 0x01873
-                .long 0xcdc200
-                .long 0x0187b
-                .long 0xcde200
-                .long 0x01883
-                .long 0xd50000
-                .long 0x0188b
-                .long 0xd52000
-                .long 0x01894
-                .long 0xd54000
-                .long 0x0189c
-                .long 0xd56100
-                .long 0x018a4
-                .long 0xd58100
-                .long 0x018ac
-                .long 0xd5a100
-                .long 0x018b4
-                .long 0xd5c200
-                .long 0x018bd
-                .long 0xd5e200
-                .long 0x018c5
-                .long 0xde0000
-                .long 0x018cd
-                .long 0xde2000
-                .long 0x018d5
-                .long 0xde4000
-                .long 0x018de
-                .long 0xde6100
-                .long 0x018e6
-                .long 0xde8100
-                .long 0x018ee
-                .long 0xdea100
-                .long 0x018f6
-                .long 0xdec200
-                .long 0x018ff
-                .long 0xdee200
-                .long 0x01c00
-                .long 0xe60000
-                .long 0x01c08
-                .long 0xe62000
-                .long 0x01c10
-                .long 0xe64000
-                .long 0x01c18
-                .long 0xe66100
-                .long 0x01c20
-                .long 0xe68100
-                .long 0x01c29
-                .long 0xe6a100
-                .long 0x01c31
-                .long 0xe6c200
-                .long 0x01c39
-                .long 0xe6e200
-                .long 0x01c41
-                .long 0xee0000
-                .long 0x01c4a
-                .long 0xee2000
-                .long 0x01c52
-                .long 0xee4000
-                .long 0x01c5a
-                .long 0xee6100
-                .long 0x01c62
-                .long 0xee8100
-                .long 0x01c6a
-                .long 0xeea100
-                .long 0x01c73
-                .long 0xeec200
-                .long 0x01c7b
-                .long 0xeee200
-                .long 0x01c83
-                .long 0xf60000
-                .long 0x01c8b
-                .long 0xf62000
-                .long 0x01c94
-                .long 0xf64000
-                .long 0x01c9c
-                .long 0xf66100
-                .long 0x01ca4
-                .long 0xf68100
-                .long 0x01cac
-                .long 0xf6a100
-                .long 0x01cb4
-                .long 0xf6c200
-                .long 0x01cbd
-                .long 0xf6e200
-                .long 0x01cc5
-                .long 0xff0000
-                .long 0x01ccd
-                .long 0xff2000
-                .long 0x01cd5
-                .long 0xff4000
-                .long 0x01cde
-                .long 0xff6100
-                .long 0x01ce6
-                .long 0xff8100
-                .long 0x01cee
-                .long 0xffa100
-                .long 0x01cf6
-                .long 0xffc200
-                .long 0x01cff
-                .long 0xffe200
-
-_ConvertX86p16_32BGR888_LUT_X86:
-                .long 0x00000
-                .long 0x00000
-                .long 0x80000
-                .long 0x02000
-                .long 0x100000
-                .long 0x04000
-                .long 0x180000
-                .long 0x06100
-                .long 0x200000
-                .long 0x08100
-                .long 0x290000
-                .long 0x0a100
-                .long 0x310000
-                .long 0x0c200
-                .long 0x390000
-                .long 0x0e200
-                .long 0x410000
-                .long 0x00008
-                .long 0x4a0000
-                .long 0x02008
-                .long 0x520000
-                .long 0x04008
-                .long 0x5a0000
-                .long 0x06108
-                .long 0x620000
-                .long 0x08108
-                .long 0x6a0000
-                .long 0x0a108
-                .long 0x730000
-                .long 0x0c208
-                .long 0x7b0000
-                .long 0x0e208
-                .long 0x830000
-                .long 0x00010
-                .long 0x8b0000
-                .long 0x02010
-                .long 0x940000
-                .long 0x04010
-                .long 0x9c0000
-                .long 0x06110
-                .long 0xa40000
-                .long 0x08110
-                .long 0xac0000
-                .long 0x0a110
-                .long 0xb40000
-                .long 0x0c210
-                .long 0xbd0000
-                .long 0x0e210
-                .long 0xc50000
-                .long 0x00018
-                .long 0xcd0000
-                .long 0x02018
-                .long 0xd50000
-                .long 0x04018
-                .long 0xde0000
-                .long 0x06118
-                .long 0xe60000
-                .long 0x08118
-                .long 0xee0000
-                .long 0x0a118
-                .long 0xf60000
-                .long 0x0c218
-                .long 0xff0000
-                .long 0x0e218
-                .long 0x00400
-                .long 0x00020
-                .long 0x80400
-                .long 0x02020
-                .long 0x100400
-                .long 0x04020
-                .long 0x180400
-                .long 0x06120
-                .long 0x200400
-                .long 0x08120
-                .long 0x290400
-                .long 0x0a120
-                .long 0x310400
-                .long 0x0c220
-                .long 0x390400
-                .long 0x0e220
-                .long 0x410400
-                .long 0x00029
-                .long 0x4a0400
-                .long 0x02029
-                .long 0x520400
-                .long 0x04029
-                .long 0x5a0400
-                .long 0x06129
-                .long 0x620400
-                .long 0x08129
-                .long 0x6a0400
-                .long 0x0a129
-                .long 0x730400
-                .long 0x0c229
-                .long 0x7b0400
-                .long 0x0e229
-                .long 0x830400
-                .long 0x00031
-                .long 0x8b0400
-                .long 0x02031
-                .long 0x940400
-                .long 0x04031
-                .long 0x9c0400
-                .long 0x06131
-                .long 0xa40400
-                .long 0x08131
-                .long 0xac0400
-                .long 0x0a131
-                .long 0xb40400
-                .long 0x0c231
-                .long 0xbd0400
-                .long 0x0e231
-                .long 0xc50400
-                .long 0x00039
-                .long 0xcd0400
-                .long 0x02039
-                .long 0xd50400
-                .long 0x04039
-                .long 0xde0400
-                .long 0x06139
-                .long 0xe60400
-                .long 0x08139
-                .long 0xee0400
-                .long 0x0a139
-                .long 0xf60400
-                .long 0x0c239
-                .long 0xff0400
-                .long 0x0e239
-                .long 0x00800
-                .long 0x00041
-                .long 0x80800
-                .long 0x02041
-                .long 0x100800
-                .long 0x04041
-                .long 0x180800
-                .long 0x06141
-                .long 0x200800
-                .long 0x08141
-                .long 0x290800
-                .long 0x0a141
-                .long 0x310800
-                .long 0x0c241
-                .long 0x390800
-                .long 0x0e241
-                .long 0x410800
-                .long 0x0004a
-                .long 0x4a0800
-                .long 0x0204a
-                .long 0x520800
-                .long 0x0404a
-                .long 0x5a0800
-                .long 0x0614a
-                .long 0x620800
-                .long 0x0814a
-                .long 0x6a0800
-                .long 0x0a14a
-                .long 0x730800
-                .long 0x0c24a
-                .long 0x7b0800
-                .long 0x0e24a
-                .long 0x830800
-                .long 0x00052
-                .long 0x8b0800
-                .long 0x02052
-                .long 0x940800
-                .long 0x04052
-                .long 0x9c0800
-                .long 0x06152
-                .long 0xa40800
-                .long 0x08152
-                .long 0xac0800
-                .long 0x0a152
-                .long 0xb40800
-                .long 0x0c252
-                .long 0xbd0800
-                .long 0x0e252
-                .long 0xc50800
-                .long 0x0005a
-                .long 0xcd0800
-                .long 0x0205a
-                .long 0xd50800
-                .long 0x0405a
-                .long 0xde0800
-                .long 0x0615a
-                .long 0xe60800
-                .long 0x0815a
-                .long 0xee0800
-                .long 0x0a15a
-                .long 0xf60800
-                .long 0x0c25a
-                .long 0xff0800
-                .long 0x0e25a
-                .long 0x00c00
-                .long 0x00062
-                .long 0x80c00
-                .long 0x02062
-                .long 0x100c00
-                .long 0x04062
-                .long 0x180c00
-                .long 0x06162
-                .long 0x200c00
-                .long 0x08162
-                .long 0x290c00
-                .long 0x0a162
-                .long 0x310c00
-                .long 0x0c262
-                .long 0x390c00
-                .long 0x0e262
-                .long 0x410c00
-                .long 0x0006a
-                .long 0x4a0c00
-                .long 0x0206a
-                .long 0x520c00
-                .long 0x0406a
-                .long 0x5a0c00
-                .long 0x0616a
-                .long 0x620c00
-                .long 0x0816a
-                .long 0x6a0c00
-                .long 0x0a16a
-                .long 0x730c00
-                .long 0x0c26a
-                .long 0x7b0c00
-                .long 0x0e26a
-                .long 0x830c00
-                .long 0x00073
-                .long 0x8b0c00
-                .long 0x02073
-                .long 0x940c00
-                .long 0x04073
-                .long 0x9c0c00
-                .long 0x06173
-                .long 0xa40c00
-                .long 0x08173
-                .long 0xac0c00
-                .long 0x0a173
-                .long 0xb40c00
-                .long 0x0c273
-                .long 0xbd0c00
-                .long 0x0e273
-                .long 0xc50c00
-                .long 0x0007b
-                .long 0xcd0c00
-                .long 0x0207b
-                .long 0xd50c00
-                .long 0x0407b
-                .long 0xde0c00
-                .long 0x0617b
-                .long 0xe60c00
-                .long 0x0817b
-                .long 0xee0c00
-                .long 0x0a17b
-                .long 0xf60c00
-                .long 0x0c27b
-                .long 0xff0c00
-                .long 0x0e27b
-                .long 0x01000
-                .long 0x00083
-                .long 0x81000
-                .long 0x02083
-                .long 0x101000
-                .long 0x04083
-                .long 0x181000
-                .long 0x06183
-                .long 0x201000
-                .long 0x08183
-                .long 0x291000
-                .long 0x0a183
-                .long 0x311000
-                .long 0x0c283
-                .long 0x391000
-                .long 0x0e283
-                .long 0x411000
-                .long 0x0008b
-                .long 0x4a1000
-                .long 0x0208b
-                .long 0x521000
-                .long 0x0408b
-                .long 0x5a1000
-                .long 0x0618b
-                .long 0x621000
-                .long 0x0818b
-                .long 0x6a1000
-                .long 0x0a18b
-                .long 0x731000
-                .long 0x0c28b
-                .long 0x7b1000
-                .long 0x0e28b
-                .long 0x831000
-                .long 0x00094
-                .long 0x8b1000
-                .long 0x02094
-                .long 0x941000
-                .long 0x04094
-                .long 0x9c1000
-                .long 0x06194
-                .long 0xa41000
-                .long 0x08194
-                .long 0xac1000
-                .long 0x0a194
-                .long 0xb41000
-                .long 0x0c294
-                .long 0xbd1000
-                .long 0x0e294
-                .long 0xc51000
-                .long 0x0009c
-                .long 0xcd1000
-                .long 0x0209c
-                .long 0xd51000
-                .long 0x0409c
-                .long 0xde1000
-                .long 0x0619c
-                .long 0xe61000
-                .long 0x0819c
-                .long 0xee1000
-                .long 0x0a19c
-                .long 0xf61000
-                .long 0x0c29c
-                .long 0xff1000
-                .long 0x0e29c
-                .long 0x01400
-                .long 0x000a4
-                .long 0x81400
-                .long 0x020a4
-                .long 0x101400
-                .long 0x040a4
-                .long 0x181400
-                .long 0x061a4
-                .long 0x201400
-                .long 0x081a4
-                .long 0x291400
-                .long 0x0a1a4
-                .long 0x311400
-                .long 0x0c2a4
-                .long 0x391400
-                .long 0x0e2a4
-                .long 0x411400
-                .long 0x000ac
-                .long 0x4a1400
-                .long 0x020ac
-                .long 0x521400
-                .long 0x040ac
-                .long 0x5a1400
-                .long 0x061ac
-                .long 0x621400
-                .long 0x081ac
-                .long 0x6a1400
-                .long 0x0a1ac
-                .long 0x731400
-                .long 0x0c2ac
-                .long 0x7b1400
-                .long 0x0e2ac
-                .long 0x831400
-                .long 0x000b4
-                .long 0x8b1400
-                .long 0x020b4
-                .long 0x941400
-                .long 0x040b4
-                .long 0x9c1400
-                .long 0x061b4
-                .long 0xa41400
-                .long 0x081b4
-                .long 0xac1400
-                .long 0x0a1b4
-                .long 0xb41400
-                .long 0x0c2b4
-                .long 0xbd1400
-                .long 0x0e2b4
-                .long 0xc51400
-                .long 0x000bd
-                .long 0xcd1400
-                .long 0x020bd
-                .long 0xd51400
-                .long 0x040bd
-                .long 0xde1400
-                .long 0x061bd
-                .long 0xe61400
-                .long 0x081bd
-                .long 0xee1400
-                .long 0x0a1bd
-                .long 0xf61400
-                .long 0x0c2bd
-                .long 0xff1400
-                .long 0x0e2bd
-                .long 0x01800
-                .long 0x000c5
-                .long 0x81800
-                .long 0x020c5
-                .long 0x101800
-                .long 0x040c5
-                .long 0x181800
-                .long 0x061c5
-                .long 0x201800
-                .long 0x081c5
-                .long 0x291800
-                .long 0x0a1c5
-                .long 0x311800
-                .long 0x0c2c5
-                .long 0x391800
-                .long 0x0e2c5
-                .long 0x411800
-                .long 0x000cd
-                .long 0x4a1800
-                .long 0x020cd
-                .long 0x521800
-                .long 0x040cd
-                .long 0x5a1800
-                .long 0x061cd
-                .long 0x621800
-                .long 0x081cd
-                .long 0x6a1800
-                .long 0x0a1cd
-                .long 0x731800
-                .long 0x0c2cd
-                .long 0x7b1800
-                .long 0x0e2cd
-                .long 0x831800
-                .long 0x000d5
-                .long 0x8b1800
-                .long 0x020d5
-                .long 0x941800
-                .long 0x040d5
-                .long 0x9c1800
-                .long 0x061d5
-                .long 0xa41800
-                .long 0x081d5
-                .long 0xac1800
-                .long 0x0a1d5
-                .long 0xb41800
-                .long 0x0c2d5
-                .long 0xbd1800
-                .long 0x0e2d5
-                .long 0xc51800
-                .long 0x000de
-                .long 0xcd1800
-                .long 0x020de
-                .long 0xd51800
-                .long 0x040de
-                .long 0xde1800
-                .long 0x061de
-                .long 0xe61800
-                .long 0x081de
-                .long 0xee1800
-                .long 0x0a1de
-                .long 0xf61800
-                .long 0x0c2de
-                .long 0xff1800
-                .long 0x0e2de
-                .long 0x01c00
-                .long 0x000e6
-                .long 0x81c00
-                .long 0x020e6
-                .long 0x101c00
-                .long 0x040e6
-                .long 0x181c00
-                .long 0x061e6
-                .long 0x201c00
-                .long 0x081e6
-                .long 0x291c00
-                .long 0x0a1e6
-                .long 0x311c00
-                .long 0x0c2e6
-                .long 0x391c00
-                .long 0x0e2e6
-                .long 0x411c00
-                .long 0x000ee
-                .long 0x4a1c00
-                .long 0x020ee
-                .long 0x521c00
-                .long 0x040ee
-                .long 0x5a1c00
-                .long 0x061ee
-                .long 0x621c00
-                .long 0x081ee
-                .long 0x6a1c00
-                .long 0x0a1ee
-                .long 0x731c00
-                .long 0x0c2ee
-                .long 0x7b1c00
-                .long 0x0e2ee
-                .long 0x831c00
-                .long 0x000f6
-                .long 0x8b1c00
-                .long 0x020f6
-                .long 0x941c00
-                .long 0x040f6
-                .long 0x9c1c00
-                .long 0x061f6
-                .long 0xa41c00
-                .long 0x081f6
-                .long 0xac1c00
-                .long 0x0a1f6
-                .long 0xb41c00
-                .long 0x0c2f6
-                .long 0xbd1c00
-                .long 0x0e2f6
-                .long 0xc51c00
-                .long 0x000ff
-                .long 0xcd1c00
-                .long 0x020ff
-                .long 0xd51c00
-                .long 0x040ff
-                .long 0xde1c00
-                .long 0x061ff
-                .long 0xe61c00
-                .long 0x081ff
-                .long 0xee1c00
-                .long 0x0a1ff
-                .long 0xf61c00
-                .long 0x0c2ff
-                .long 0xff1c00
-                .long 0x0e2ff
-
-_ConvertX86p16_32RGBA888_LUT_X86:
-                .long 0x00000
-                .long 0x000ff
-                .long 0x00800
-                .long 0x2000ff
-                .long 0x01000
-                .long 0x4000ff
-                .long 0x01800
-                .long 0x6100ff
-                .long 0x02000
-                .long 0x8100ff
-                .long 0x02900
-                .long 0xa100ff
-                .long 0x03100
-                .long 0xc200ff
-                .long 0x03900
-                .long 0xe200ff
-                .long 0x04100
-                .long 0x80000ff
-                .long 0x04a00
-                .long 0x82000ff
-                .long 0x05200
-                .long 0x84000ff
-                .long 0x05a00
-                .long 0x86100ff
-                .long 0x06200
-                .long 0x88100ff
-                .long 0x06a00
-                .long 0x8a100ff
-                .long 0x07300
-                .long 0x8c200ff
-                .long 0x07b00
-                .long 0x8e200ff
-                .long 0x08300
-                .long 0x100000ff
-                .long 0x08b00
-                .long 0x102000ff
-                .long 0x09400
-                .long 0x104000ff
-                .long 0x09c00
-                .long 0x106100ff
-                .long 0x0a400
-                .long 0x108100ff
-                .long 0x0ac00
-                .long 0x10a100ff
-                .long 0x0b400
-                .long 0x10c200ff
-                .long 0x0bd00
-                .long 0x10e200ff
-                .long 0x0c500
-                .long 0x180000ff
-                .long 0x0cd00
-                .long 0x182000ff
-                .long 0x0d500
-                .long 0x184000ff
-                .long 0x0de00
-                .long 0x186100ff
-                .long 0x0e600
-                .long 0x188100ff
-                .long 0x0ee00
-                .long 0x18a100ff
-                .long 0x0f600
-                .long 0x18c200ff
-                .long 0x0ff00
-                .long 0x18e200ff
-                .long 0x40000
-                .long 0x200000ff
-                .long 0x40800
-                .long 0x202000ff
-                .long 0x41000
-                .long 0x204000ff
-                .long 0x41800
-                .long 0x206100ff
-                .long 0x42000
-                .long 0x208100ff
-                .long 0x42900
-                .long 0x20a100ff
-                .long 0x43100
-                .long 0x20c200ff
-                .long 0x43900
-                .long 0x20e200ff
-                .long 0x44100
-                .long 0x290000ff
-                .long 0x44a00
-                .long 0x292000ff
-                .long 0x45200
-                .long 0x294000ff
-                .long 0x45a00
-                .long 0x296100ff
-                .long 0x46200
-                .long 0x298100ff
-                .long 0x46a00
-                .long 0x29a100ff
-                .long 0x47300
-                .long 0x29c200ff
-                .long 0x47b00
-                .long 0x29e200ff
-                .long 0x48300
-                .long 0x310000ff
-                .long 0x48b00
-                .long 0x312000ff
-                .long 0x49400
-                .long 0x314000ff
-                .long 0x49c00
-                .long 0x316100ff
-                .long 0x4a400
-                .long 0x318100ff
-                .long 0x4ac00
-                .long 0x31a100ff
-                .long 0x4b400
-                .long 0x31c200ff
-                .long 0x4bd00
-                .long 0x31e200ff
-                .long 0x4c500
-                .long 0x390000ff
-                .long 0x4cd00
-                .long 0x392000ff
-                .long 0x4d500
-                .long 0x394000ff
-                .long 0x4de00
-                .long 0x396100ff
-                .long 0x4e600
-                .long 0x398100ff
-                .long 0x4ee00
-                .long 0x39a100ff
-                .long 0x4f600
-                .long 0x39c200ff
-                .long 0x4ff00
-                .long 0x39e200ff
-                .long 0x80000
-                .long 0x410000ff
-                .long 0x80800
-                .long 0x412000ff
-                .long 0x81000
-                .long 0x414000ff
-                .long 0x81800
-                .long 0x416100ff
-                .long 0x82000
-                .long 0x418100ff
-                .long 0x82900
-                .long 0x41a100ff
-                .long 0x83100
-                .long 0x41c200ff
-                .long 0x83900
-                .long 0x41e200ff
-                .long 0x84100
-                .long 0x4a0000ff
-                .long 0x84a00
-                .long 0x4a2000ff
-                .long 0x85200
-                .long 0x4a4000ff
-                .long 0x85a00
-                .long 0x4a6100ff
-                .long 0x86200
-                .long 0x4a8100ff
-                .long 0x86a00
-                .long 0x4aa100ff
-                .long 0x87300
-                .long 0x4ac200ff
-                .long 0x87b00
-                .long 0x4ae200ff
-                .long 0x88300
-                .long 0x520000ff
-                .long 0x88b00
-                .long 0x522000ff
-                .long 0x89400
-                .long 0x524000ff
-                .long 0x89c00
-                .long 0x526100ff
-                .long 0x8a400
-                .long 0x528100ff
-                .long 0x8ac00
-                .long 0x52a100ff
-                .long 0x8b400
-                .long 0x52c200ff
-                .long 0x8bd00
-                .long 0x52e200ff
-                .long 0x8c500
-                .long 0x5a0000ff
-                .long 0x8cd00
-                .long 0x5a2000ff
-                .long 0x8d500
-                .long 0x5a4000ff
-                .long 0x8de00
-                .long 0x5a6100ff
-                .long 0x8e600
-                .long 0x5a8100ff
-                .long 0x8ee00
-                .long 0x5aa100ff
-                .long 0x8f600
-                .long 0x5ac200ff
-                .long 0x8ff00
-                .long 0x5ae200ff
-                .long 0xc0000
-                .long 0x620000ff
-                .long 0xc0800
-                .long 0x622000ff
-                .long 0xc1000
-                .long 0x624000ff
-                .long 0xc1800
-                .long 0x626100ff
-                .long 0xc2000
-                .long 0x628100ff
-                .long 0xc2900
-                .long 0x62a100ff
-                .long 0xc3100
-                .long 0x62c200ff
-                .long 0xc3900
-                .long 0x62e200ff
-                .long 0xc4100
-                .long 0x6a0000ff
-                .long 0xc4a00
-                .long 0x6a2000ff
-                .long 0xc5200
-                .long 0x6a4000ff
-                .long 0xc5a00
-                .long 0x6a6100ff
-                .long 0xc6200
-                .long 0x6a8100ff
-                .long 0xc6a00
-                .long 0x6aa100ff
-                .long 0xc7300
-                .long 0x6ac200ff
-                .long 0xc7b00
-                .long 0x6ae200ff
-                .long 0xc8300
-                .long 0x730000ff
-                .long 0xc8b00
-                .long 0x732000ff
-                .long 0xc9400
-                .long 0x734000ff
-                .long 0xc9c00
-                .long 0x736100ff
-                .long 0xca400
-                .long 0x738100ff
-                .long 0xcac00
-                .long 0x73a100ff
-                .long 0xcb400
-                .long 0x73c200ff
-                .long 0xcbd00
-                .long 0x73e200ff
-                .long 0xcc500
-                .long 0x7b0000ff
-                .long 0xccd00
-                .long 0x7b2000ff
-                .long 0xcd500
-                .long 0x7b4000ff
-                .long 0xcde00
-                .long 0x7b6100ff
-                .long 0xce600
-                .long 0x7b8100ff
-                .long 0xcee00
-                .long 0x7ba100ff
-                .long 0xcf600
-                .long 0x7bc200ff
-                .long 0xcff00
-                .long 0x7be200ff
-                .long 0x100000
-                .long 0x830000ff
-                .long 0x100800
-                .long 0x832000ff
-                .long 0x101000
-                .long 0x834000ff
-                .long 0x101800
-                .long 0x836100ff
-                .long 0x102000
-                .long 0x838100ff
-                .long 0x102900
-                .long 0x83a100ff
-                .long 0x103100
-                .long 0x83c200ff
-                .long 0x103900
-                .long 0x83e200ff
-                .long 0x104100
-                .long 0x8b0000ff
-                .long 0x104a00
-                .long 0x8b2000ff
-                .long 0x105200
-                .long 0x8b4000ff
-                .long 0x105a00
-                .long 0x8b6100ff
-                .long 0x106200
-                .long 0x8b8100ff
-                .long 0x106a00
-                .long 0x8ba100ff
-                .long 0x107300
-                .long 0x8bc200ff
-                .long 0x107b00
-                .long 0x8be200ff
-                .long 0x108300
-                .long 0x940000ff
-                .long 0x108b00
-                .long 0x942000ff
-                .long 0x109400
-                .long 0x944000ff
-                .long 0x109c00
-                .long 0x946100ff
-                .long 0x10a400
-                .long 0x948100ff
-                .long 0x10ac00
-                .long 0x94a100ff
-                .long 0x10b400
-                .long 0x94c200ff
-                .long 0x10bd00
-                .long 0x94e200ff
-                .long 0x10c500
-                .long 0x9c0000ff
-                .long 0x10cd00
-                .long 0x9c2000ff
-                .long 0x10d500
-                .long 0x9c4000ff
-                .long 0x10de00
-                .long 0x9c6100ff
-                .long 0x10e600
-                .long 0x9c8100ff
-                .long 0x10ee00
-                .long 0x9ca100ff
-                .long 0x10f600
-                .long 0x9cc200ff
-                .long 0x10ff00
-                .long 0x9ce200ff
-                .long 0x140000
-                .long 0xa40000ff
-                .long 0x140800
-                .long 0xa42000ff
-                .long 0x141000
-                .long 0xa44000ff
-                .long 0x141800
-                .long 0xa46100ff
-                .long 0x142000
-                .long 0xa48100ff
-                .long 0x142900
-                .long 0xa4a100ff
-                .long 0x143100
-                .long 0xa4c200ff
-                .long 0x143900
-                .long 0xa4e200ff
-                .long 0x144100
-                .long 0xac0000ff
-                .long 0x144a00
-                .long 0xac2000ff
-                .long 0x145200
-                .long 0xac4000ff
-                .long 0x145a00
-                .long 0xac6100ff
-                .long 0x146200
-                .long 0xac8100ff
-                .long 0x146a00
-                .long 0xaca100ff
-                .long 0x147300
-                .long 0xacc200ff
-                .long 0x147b00
-                .long 0xace200ff
-                .long 0x148300
-                .long 0xb40000ff
-                .long 0x148b00
-                .long 0xb42000ff
-                .long 0x149400
-                .long 0xb44000ff
-                .long 0x149c00
-                .long 0xb46100ff
-                .long 0x14a400
-                .long 0xb48100ff
-                .long 0x14ac00
-                .long 0xb4a100ff
-                .long 0x14b400
-                .long 0xb4c200ff
-                .long 0x14bd00
-                .long 0xb4e200ff
-                .long 0x14c500
-                .long 0xbd0000ff
-                .long 0x14cd00
-                .long 0xbd2000ff
-                .long 0x14d500
-                .long 0xbd4000ff
-                .long 0x14de00
-                .long 0xbd6100ff
-                .long 0x14e600
-                .long 0xbd8100ff
-                .long 0x14ee00
-                .long 0xbda100ff
-                .long 0x14f600
-                .long 0xbdc200ff
-                .long 0x14ff00
-                .long 0xbde200ff
-                .long 0x180000
-                .long 0xc50000ff
-                .long 0x180800
-                .long 0xc52000ff
-                .long 0x181000
-                .long 0xc54000ff
-                .long 0x181800
-                .long 0xc56100ff
-                .long 0x182000
-                .long 0xc58100ff
-                .long 0x182900
-                .long 0xc5a100ff
-                .long 0x183100
-                .long 0xc5c200ff
-                .long 0x183900
-                .long 0xc5e200ff
-                .long 0x184100
-                .long 0xcd0000ff
-                .long 0x184a00
-                .long 0xcd2000ff
-                .long 0x185200
-                .long 0xcd4000ff
-                .long 0x185a00
-                .long 0xcd6100ff
-                .long 0x186200
-                .long 0xcd8100ff
-                .long 0x186a00
-                .long 0xcda100ff
-                .long 0x187300
-                .long 0xcdc200ff
-                .long 0x187b00
-                .long 0xcde200ff
-                .long 0x188300
-                .long 0xd50000ff
-                .long 0x188b00
-                .long 0xd52000ff
-                .long 0x189400
-                .long 0xd54000ff
-                .long 0x189c00
-                .long 0xd56100ff
-                .long 0x18a400
-                .long 0xd58100ff
-                .long 0x18ac00
-                .long 0xd5a100ff
-                .long 0x18b400
-                .long 0xd5c200ff
-                .long 0x18bd00
-                .long 0xd5e200ff
-                .long 0x18c500
-                .long 0xde0000ff
-                .long 0x18cd00
-                .long 0xde2000ff
-                .long 0x18d500
-                .long 0xde4000ff
-                .long 0x18de00
-                .long 0xde6100ff
-                .long 0x18e600
-                .long 0xde8100ff
-                .long 0x18ee00
-                .long 0xdea100ff
-                .long 0x18f600
-                .long 0xdec200ff
-                .long 0x18ff00
-                .long 0xdee200ff
-                .long 0x1c0000
-                .long 0xe60000ff
-                .long 0x1c0800
-                .long 0xe62000ff
-                .long 0x1c1000
-                .long 0xe64000ff
-                .long 0x1c1800
-                .long 0xe66100ff
-                .long 0x1c2000
-                .long 0xe68100ff
-                .long 0x1c2900
-                .long 0xe6a100ff
-                .long 0x1c3100
-                .long 0xe6c200ff
-                .long 0x1c3900
-                .long 0xe6e200ff
-                .long 0x1c4100
-                .long 0xee0000ff
-                .long 0x1c4a00
-                .long 0xee2000ff
-                .long 0x1c5200
-                .long 0xee4000ff
-                .long 0x1c5a00
-                .long 0xee6100ff
-                .long 0x1c6200
-                .long 0xee8100ff
-                .long 0x1c6a00
-                .long 0xeea100ff
-                .long 0x1c7300
-                .long 0xeec200ff
-                .long 0x1c7b00
-                .long 0xeee200ff
-                .long 0x1c8300
-                .long 0xf60000ff
-                .long 0x1c8b00
-                .long 0xf62000ff
-                .long 0x1c9400
-                .long 0xf64000ff
-                .long 0x1c9c00
-                .long 0xf66100ff
-                .long 0x1ca400
-                .long 0xf68100ff
-                .long 0x1cac00
-                .long 0xf6a100ff
-                .long 0x1cb400
-                .long 0xf6c200ff
-                .long 0x1cbd00
-                .long 0xf6e200ff
-                .long 0x1cc500
-                .long 0xff0000ff
-                .long 0x1ccd00
-                .long 0xff2000ff
-                .long 0x1cd500
-                .long 0xff4000ff
-                .long 0x1cde00
-                .long 0xff6100ff
-                .long 0x1ce600
-                .long 0xff8100ff
-                .long 0x1cee00
-                .long 0xffa100ff
-                .long 0x1cf600
-                .long 0xffc200ff
-                .long 0x1cff00
-                .long 0xffe200ff
-
-_ConvertX86p16_32BGRA888_LUT_X86:
-                .long 0x00000
-                .long 0x000ff
-                .long 0x8000000
-                .long 0x2000ff
-                .long 0x10000000
-                .long 0x4000ff
-                .long 0x18000000
-                .long 0x6100ff
-                .long 0x20000000
-                .long 0x8100ff
-                .long 0x29000000
-                .long 0xa100ff
-                .long 0x31000000
-                .long 0xc200ff
-                .long 0x39000000
-                .long 0xe200ff
-                .long 0x41000000
-                .long 0x008ff
-                .long 0x4a000000
-                .long 0x2008ff
-                .long 0x52000000
-                .long 0x4008ff
-                .long 0x5a000000
-                .long 0x6108ff
-                .long 0x62000000
-                .long 0x8108ff
-                .long 0x6a000000
-                .long 0xa108ff
-                .long 0x73000000
-                .long 0xc208ff
-                .long 0x7b000000
-                .long 0xe208ff
-                .long 0x83000000
-                .long 0x010ff
-                .long 0x8b000000
-                .long 0x2010ff
-                .long 0x94000000
-                .long 0x4010ff
-                .long 0x9c000000
-                .long 0x6110ff
-                .long 0xa4000000
-                .long 0x8110ff
-                .long 0xac000000
-                .long 0xa110ff
-                .long 0xb4000000
-                .long 0xc210ff
-                .long 0xbd000000
-                .long 0xe210ff
-                .long 0xc5000000
-                .long 0x018ff
-                .long 0xcd000000
-                .long 0x2018ff
-                .long 0xd5000000
-                .long 0x4018ff
-                .long 0xde000000
-                .long 0x6118ff
-                .long 0xe6000000
-                .long 0x8118ff
-                .long 0xee000000
-                .long 0xa118ff
-                .long 0xf6000000
-                .long 0xc218ff
-                .long 0xff000000
-                .long 0xe218ff
-                .long 0x40000
-                .long 0x020ff
-                .long 0x8040000
-                .long 0x2020ff
-                .long 0x10040000
-                .long 0x4020ff
-                .long 0x18040000
-                .long 0x6120ff
-                .long 0x20040000
-                .long 0x8120ff
-                .long 0x29040000
-                .long 0xa120ff
-                .long 0x31040000
-                .long 0xc220ff
-                .long 0x39040000
-                .long 0xe220ff
-                .long 0x41040000
-                .long 0x029ff
-                .long 0x4a040000
-                .long 0x2029ff
-                .long 0x52040000
-                .long 0x4029ff
-                .long 0x5a040000
-                .long 0x6129ff
-                .long 0x62040000
-                .long 0x8129ff
-                .long 0x6a040000
-                .long 0xa129ff
-                .long 0x73040000
-                .long 0xc229ff
-                .long 0x7b040000
-                .long 0xe229ff
-                .long 0x83040000
-                .long 0x031ff
-                .long 0x8b040000
-                .long 0x2031ff
-                .long 0x94040000
-                .long 0x4031ff
-                .long 0x9c040000
-                .long 0x6131ff
-                .long 0xa4040000
-                .long 0x8131ff
-                .long 0xac040000
-                .long 0xa131ff
-                .long 0xb4040000
-                .long 0xc231ff
-                .long 0xbd040000
-                .long 0xe231ff
-                .long 0xc5040000
-                .long 0x039ff
-                .long 0xcd040000
-                .long 0x2039ff
-                .long 0xd5040000
-                .long 0x4039ff
-                .long 0xde040000
-                .long 0x6139ff
-                .long 0xe6040000
-                .long 0x8139ff
-                .long 0xee040000
-                .long 0xa139ff
-                .long 0xf6040000
-                .long 0xc239ff
-                .long 0xff040000
-                .long 0xe239ff
-                .long 0x80000
-                .long 0x041ff
-                .long 0x8080000
-                .long 0x2041ff
-                .long 0x10080000
-                .long 0x4041ff
-                .long 0x18080000
-                .long 0x6141ff
-                .long 0x20080000
-                .long 0x8141ff
-                .long 0x29080000
-                .long 0xa141ff
-                .long 0x31080000
-                .long 0xc241ff
-                .long 0x39080000
-                .long 0xe241ff
-                .long 0x41080000
-                .long 0x04aff
-                .long 0x4a080000
-                .long 0x204aff
-                .long 0x52080000
-                .long 0x404aff
-                .long 0x5a080000
-                .long 0x614aff
-                .long 0x62080000
-                .long 0x814aff
-                .long 0x6a080000
-                .long 0xa14aff
-                .long 0x73080000
-                .long 0xc24aff
-                .long 0x7b080000
-                .long 0xe24aff
-                .long 0x83080000
-                .long 0x052ff
-                .long 0x8b080000
-                .long 0x2052ff
-                .long 0x94080000
-                .long 0x4052ff
-                .long 0x9c080000
-                .long 0x6152ff
-                .long 0xa4080000
-                .long 0x8152ff
-                .long 0xac080000
-                .long 0xa152ff
-                .long 0xb4080000
-                .long 0xc252ff
-                .long 0xbd080000
-                .long 0xe252ff
-                .long 0xc5080000
-                .long 0x05aff
-                .long 0xcd080000
-                .long 0x205aff
-                .long 0xd5080000
-                .long 0x405aff
-                .long 0xde080000
-                .long 0x615aff
-                .long 0xe6080000
-                .long 0x815aff
-                .long 0xee080000
-                .long 0xa15aff
-                .long 0xf6080000
-                .long 0xc25aff
-                .long 0xff080000
-                .long 0xe25aff
-                .long 0xc0000
-                .long 0x062ff
-                .long 0x80c0000
-                .long 0x2062ff
-                .long 0x100c0000
-                .long 0x4062ff
-                .long 0x180c0000
-                .long 0x6162ff
-                .long 0x200c0000
-                .long 0x8162ff
-                .long 0x290c0000
-                .long 0xa162ff
-                .long 0x310c0000
-                .long 0xc262ff
-                .long 0x390c0000
-                .long 0xe262ff
-                .long 0x410c0000
-                .long 0x06aff
-                .long 0x4a0c0000
-                .long 0x206aff
-                .long 0x520c0000
-                .long 0x406aff
-                .long 0x5a0c0000
-                .long 0x616aff
-                .long 0x620c0000
-                .long 0x816aff
-                .long 0x6a0c0000
-                .long 0xa16aff
-                .long 0x730c0000
-                .long 0xc26aff
-                .long 0x7b0c0000
-                .long 0xe26aff
-                .long 0x830c0000
-                .long 0x073ff
-                .long 0x8b0c0000
-                .long 0x2073ff
-                .long 0x940c0000
-                .long 0x4073ff
-                .long 0x9c0c0000
-                .long 0x6173ff
-                .long 0xa40c0000
-                .long 0x8173ff
-                .long 0xac0c0000
-                .long 0xa173ff
-                .long 0xb40c0000
-                .long 0xc273ff
-                .long 0xbd0c0000
-                .long 0xe273ff
-                .long 0xc50c0000
-                .long 0x07bff
-                .long 0xcd0c0000
-                .long 0x207bff
-                .long 0xd50c0000
-                .long 0x407bff
-                .long 0xde0c0000
-                .long 0x617bff
-                .long 0xe60c0000
-                .long 0x817bff
-                .long 0xee0c0000
-                .long 0xa17bff
-                .long 0xf60c0000
-                .long 0xc27bff
-                .long 0xff0c0000
-                .long 0xe27bff
-                .long 0x100000
-                .long 0x083ff
-                .long 0x8100000
-                .long 0x2083ff
-                .long 0x10100000
-                .long 0x4083ff
-                .long 0x18100000
-                .long 0x6183ff
-                .long 0x20100000
-                .long 0x8183ff
-                .long 0x29100000
-                .long 0xa183ff
-                .long 0x31100000
-                .long 0xc283ff
-                .long 0x39100000
-                .long 0xe283ff
-                .long 0x41100000
-                .long 0x08bff
-                .long 0x4a100000
-                .long 0x208bff
-                .long 0x52100000
-                .long 0x408bff
-                .long 0x5a100000
-                .long 0x618bff
-                .long 0x62100000
-                .long 0x818bff
-                .long 0x6a100000
-                .long 0xa18bff
-                .long 0x73100000
-                .long 0xc28bff
-                .long 0x7b100000
-                .long 0xe28bff
-                .long 0x83100000
-                .long 0x094ff
-                .long 0x8b100000
-                .long 0x2094ff
-                .long 0x94100000
-                .long 0x4094ff
-                .long 0x9c100000
-                .long 0x6194ff
-                .long 0xa4100000
-                .long 0x8194ff
-                .long 0xac100000
-                .long 0xa194ff
-                .long 0xb4100000
-                .long 0xc294ff
-                .long 0xbd100000
-                .long 0xe294ff
-                .long 0xc5100000
-                .long 0x09cff
-                .long 0xcd100000
-                .long 0x209cff
-                .long 0xd5100000
-                .long 0x409cff
-                .long 0xde100000
-                .long 0x619cff
-                .long 0xe6100000
-                .long 0x819cff
-                .long 0xee100000
-                .long 0xa19cff
-                .long 0xf6100000
-                .long 0xc29cff
-                .long 0xff100000
-                .long 0xe29cff
-                .long 0x140000
-                .long 0x0a4ff
-                .long 0x8140000
-                .long 0x20a4ff
-                .long 0x10140000
-                .long 0x40a4ff
-                .long 0x18140000
-                .long 0x61a4ff
-                .long 0x20140000
-                .long 0x81a4ff
-                .long 0x29140000
-                .long 0xa1a4ff
-                .long 0x31140000
-                .long 0xc2a4ff
-                .long 0x39140000
-                .long 0xe2a4ff
-                .long 0x41140000
-                .long 0x0acff
-                .long 0x4a140000
-                .long 0x20acff
-                .long 0x52140000
-                .long 0x40acff
-                .long 0x5a140000
-                .long 0x61acff
-                .long 0x62140000
-                .long 0x81acff
-                .long 0x6a140000
-                .long 0xa1acff
-                .long 0x73140000
-                .long 0xc2acff
-                .long 0x7b140000
-                .long 0xe2acff
-                .long 0x83140000
-                .long 0x0b4ff
-                .long 0x8b140000
-                .long 0x20b4ff
-                .long 0x94140000
-                .long 0x40b4ff
-                .long 0x9c140000
-                .long 0x61b4ff
-                .long 0xa4140000
-                .long 0x81b4ff
-                .long 0xac140000
-                .long 0xa1b4ff
-                .long 0xb4140000
-                .long 0xc2b4ff
-                .long 0xbd140000
-                .long 0xe2b4ff
-                .long 0xc5140000
-                .long 0x0bdff
-                .long 0xcd140000
-                .long 0x20bdff
-                .long 0xd5140000
-                .long 0x40bdff
-                .long 0xde140000
-                .long 0x61bdff
-                .long 0xe6140000
-                .long 0x81bdff
-                .long 0xee140000
-                .long 0xa1bdff
-                .long 0xf6140000
-                .long 0xc2bdff
-                .long 0xff140000
-                .long 0xe2bdff
-                .long 0x180000
-                .long 0x0c5ff
-                .long 0x8180000
-                .long 0x20c5ff
-                .long 0x10180000
-                .long 0x40c5ff
-                .long 0x18180000
-                .long 0x61c5ff
-                .long 0x20180000
-                .long 0x81c5ff
-                .long 0x29180000
-                .long 0xa1c5ff
-                .long 0x31180000
-                .long 0xc2c5ff
-                .long 0x39180000
-                .long 0xe2c5ff
-                .long 0x41180000
-                .long 0x0cdff
-                .long 0x4a180000
-                .long 0x20cdff
-                .long 0x52180000
-                .long 0x40cdff
-                .long 0x5a180000
-                .long 0x61cdff
-                .long 0x62180000
-                .long 0x81cdff
-                .long 0x6a180000
-                .long 0xa1cdff
-                .long 0x73180000
-                .long 0xc2cdff
-                .long 0x7b180000
-                .long 0xe2cdff
-                .long 0x83180000
-                .long 0x0d5ff
-                .long 0x8b180000
-                .long 0x20d5ff
-                .long 0x94180000
-                .long 0x40d5ff
-                .long 0x9c180000
-                .long 0x61d5ff
-                .long 0xa4180000
-                .long 0x81d5ff
-                .long 0xac180000
-                .long 0xa1d5ff
-                .long 0xb4180000
-                .long 0xc2d5ff
-                .long 0xbd180000
-                .long 0xe2d5ff
-                .long 0xc5180000
-                .long 0x0deff
-                .long 0xcd180000
-                .long 0x20deff
-                .long 0xd5180000
-                .long 0x40deff
-                .long 0xde180000
-                .long 0x61deff
-                .long 0xe6180000
-                .long 0x81deff
-                .long 0xee180000
-                .long 0xa1deff
-                .long 0xf6180000
-                .long 0xc2deff
-                .long 0xff180000
-                .long 0xe2deff
-                .long 0x1c0000
-                .long 0x0e6ff
-                .long 0x81c0000
-                .long 0x20e6ff
-                .long 0x101c0000
-                .long 0x40e6ff
-                .long 0x181c0000
-                .long 0x61e6ff
-                .long 0x201c0000
-                .long 0x81e6ff
-                .long 0x291c0000
-                .long 0xa1e6ff
-                .long 0x311c0000
-                .long 0xc2e6ff
-                .long 0x391c0000
-                .long 0xe2e6ff
-                .long 0x411c0000
-                .long 0x0eeff
-                .long 0x4a1c0000
-                .long 0x20eeff
-                .long 0x521c0000
-                .long 0x40eeff
-                .long 0x5a1c0000
-                .long 0x61eeff
-                .long 0x621c0000
-                .long 0x81eeff
-                .long 0x6a1c0000
-                .long 0xa1eeff
-                .long 0x731c0000
-                .long 0xc2eeff
-                .long 0x7b1c0000
-                .long 0xe2eeff
-                .long 0x831c0000
-                .long 0x0f6ff
-                .long 0x8b1c0000
-                .long 0x20f6ff
-                .long 0x941c0000
-                .long 0x40f6ff
-                .long 0x9c1c0000
-                .long 0x61f6ff
-                .long 0xa41c0000
-                .long 0x81f6ff
-                .long 0xac1c0000
-                .long 0xa1f6ff
-                .long 0xb41c0000
-                .long 0xc2f6ff
-                .long 0xbd1c0000
-                .long 0xe2f6ff
-                .long 0xc51c0000
-                .long 0x0ffff
-                .long 0xcd1c0000
-                .long 0x20ffff
-                .long 0xd51c0000
-                .long 0x40ffff
-                .long 0xde1c0000
-                .long 0x61ffff
-                .long 0xe61c0000
-                .long 0x81ffff
-                .long 0xee1c0000
-                .long 0xa1ffff
-                .long 0xf61c0000
-                .long 0xc2ffff
-                .long 0xff1c0000
-                .long 0xe2ffff
-

+ 2088 - 0
packages/hermes/src/i386/x8616lut.inc

@@ -0,0 +1,2088 @@
+{
+    x86 format converters for HERMES
+    Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
+    Some routines are (c) Glenn Fiedler ([email protected]), used with permission
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+const
+  ConvertX86p16_32RGB888_LUT_X86: array [0..511] of DWord = (
+                  $00000
+                , $00000
+                , $00008
+                , $02000
+                , $00010
+                , $04000
+                , $00018
+                , $06100
+                , $00020
+                , $08100
+                , $00029
+                , $0a100
+                , $00031
+                , $0c200
+                , $00039
+                , $0e200
+                , $00041
+                , $80000
+                , $0004a
+                , $82000
+                , $00052
+                , $84000
+                , $0005a
+                , $86100
+                , $00062
+                , $88100
+                , $0006a
+                , $8a100
+                , $00073
+                , $8c200
+                , $0007b
+                , $8e200
+                , $00083
+                , $100000
+                , $0008b
+                , $102000
+                , $00094
+                , $104000
+                , $0009c
+                , $106100
+                , $000a4
+                , $108100
+                , $000ac
+                , $10a100
+                , $000b4
+                , $10c200
+                , $000bd
+                , $10e200
+                , $000c5
+                , $180000
+                , $000cd
+                , $182000
+                , $000d5
+                , $184000
+                , $000de
+                , $186100
+                , $000e6
+                , $188100
+                , $000ee
+                , $18a100
+                , $000f6
+                , $18c200
+                , $000ff
+                , $18e200
+                , $00400
+                , $200000
+                , $00408
+                , $202000
+                , $00410
+                , $204000
+                , $00418
+                , $206100
+                , $00420
+                , $208100
+                , $00429
+                , $20a100
+                , $00431
+                , $20c200
+                , $00439
+                , $20e200
+                , $00441
+                , $290000
+                , $0044a
+                , $292000
+                , $00452
+                , $294000
+                , $0045a
+                , $296100
+                , $00462
+                , $298100
+                , $0046a
+                , $29a100
+                , $00473
+                , $29c200
+                , $0047b
+                , $29e200
+                , $00483
+                , $310000
+                , $0048b
+                , $312000
+                , $00494
+                , $314000
+                , $0049c
+                , $316100
+                , $004a4
+                , $318100
+                , $004ac
+                , $31a100
+                , $004b4
+                , $31c200
+                , $004bd
+                , $31e200
+                , $004c5
+                , $390000
+                , $004cd
+                , $392000
+                , $004d5
+                , $394000
+                , $004de
+                , $396100
+                , $004e6
+                , $398100
+                , $004ee
+                , $39a100
+                , $004f6
+                , $39c200
+                , $004ff
+                , $39e200
+                , $00800
+                , $410000
+                , $00808
+                , $412000
+                , $00810
+                , $414000
+                , $00818
+                , $416100
+                , $00820
+                , $418100
+                , $00829
+                , $41a100
+                , $00831
+                , $41c200
+                , $00839
+                , $41e200
+                , $00841
+                , $4a0000
+                , $0084a
+                , $4a2000
+                , $00852
+                , $4a4000
+                , $0085a
+                , $4a6100
+                , $00862
+                , $4a8100
+                , $0086a
+                , $4aa100
+                , $00873
+                , $4ac200
+                , $0087b
+                , $4ae200
+                , $00883
+                , $520000
+                , $0088b
+                , $522000
+                , $00894
+                , $524000
+                , $0089c
+                , $526100
+                , $008a4
+                , $528100
+                , $008ac
+                , $52a100
+                , $008b4
+                , $52c200
+                , $008bd
+                , $52e200
+                , $008c5
+                , $5a0000
+                , $008cd
+                , $5a2000
+                , $008d5
+                , $5a4000
+                , $008de
+                , $5a6100
+                , $008e6
+                , $5a8100
+                , $008ee
+                , $5aa100
+                , $008f6
+                , $5ac200
+                , $008ff
+                , $5ae200
+                , $00c00
+                , $620000
+                , $00c08
+                , $622000
+                , $00c10
+                , $624000
+                , $00c18
+                , $626100
+                , $00c20
+                , $628100
+                , $00c29
+                , $62a100
+                , $00c31
+                , $62c200
+                , $00c39
+                , $62e200
+                , $00c41
+                , $6a0000
+                , $00c4a
+                , $6a2000
+                , $00c52
+                , $6a4000
+                , $00c5a
+                , $6a6100
+                , $00c62
+                , $6a8100
+                , $00c6a
+                , $6aa100
+                , $00c73
+                , $6ac200
+                , $00c7b
+                , $6ae200
+                , $00c83
+                , $730000
+                , $00c8b
+                , $732000
+                , $00c94
+                , $734000
+                , $00c9c
+                , $736100
+                , $00ca4
+                , $738100
+                , $00cac
+                , $73a100
+                , $00cb4
+                , $73c200
+                , $00cbd
+                , $73e200
+                , $00cc5
+                , $7b0000
+                , $00ccd
+                , $7b2000
+                , $00cd5
+                , $7b4000
+                , $00cde
+                , $7b6100
+                , $00ce6
+                , $7b8100
+                , $00cee
+                , $7ba100
+                , $00cf6
+                , $7bc200
+                , $00cff
+                , $7be200
+                , $01000
+                , $830000
+                , $01008
+                , $832000
+                , $01010
+                , $834000
+                , $01018
+                , $836100
+                , $01020
+                , $838100
+                , $01029
+                , $83a100
+                , $01031
+                , $83c200
+                , $01039
+                , $83e200
+                , $01041
+                , $8b0000
+                , $0104a
+                , $8b2000
+                , $01052
+                , $8b4000
+                , $0105a
+                , $8b6100
+                , $01062
+                , $8b8100
+                , $0106a
+                , $8ba100
+                , $01073
+                , $8bc200
+                , $0107b
+                , $8be200
+                , $01083
+                , $940000
+                , $0108b
+                , $942000
+                , $01094
+                , $944000
+                , $0109c
+                , $946100
+                , $010a4
+                , $948100
+                , $010ac
+                , $94a100
+                , $010b4
+                , $94c200
+                , $010bd
+                , $94e200
+                , $010c5
+                , $9c0000
+                , $010cd
+                , $9c2000
+                , $010d5
+                , $9c4000
+                , $010de
+                , $9c6100
+                , $010e6
+                , $9c8100
+                , $010ee
+                , $9ca100
+                , $010f6
+                , $9cc200
+                , $010ff
+                , $9ce200
+                , $01400
+                , $a40000
+                , $01408
+                , $a42000
+                , $01410
+                , $a44000
+                , $01418
+                , $a46100
+                , $01420
+                , $a48100
+                , $01429
+                , $a4a100
+                , $01431
+                , $a4c200
+                , $01439
+                , $a4e200
+                , $01441
+                , $ac0000
+                , $0144a
+                , $ac2000
+                , $01452
+                , $ac4000
+                , $0145a
+                , $ac6100
+                , $01462
+                , $ac8100
+                , $0146a
+                , $aca100
+                , $01473
+                , $acc200
+                , $0147b
+                , $ace200
+                , $01483
+                , $b40000
+                , $0148b
+                , $b42000
+                , $01494
+                , $b44000
+                , $0149c
+                , $b46100
+                , $014a4
+                , $b48100
+                , $014ac
+                , $b4a100
+                , $014b4
+                , $b4c200
+                , $014bd
+                , $b4e200
+                , $014c5
+                , $bd0000
+                , $014cd
+                , $bd2000
+                , $014d5
+                , $bd4000
+                , $014de
+                , $bd6100
+                , $014e6
+                , $bd8100
+                , $014ee
+                , $bda100
+                , $014f6
+                , $bdc200
+                , $014ff
+                , $bde200
+                , $01800
+                , $c50000
+                , $01808
+                , $c52000
+                , $01810
+                , $c54000
+                , $01818
+                , $c56100
+                , $01820
+                , $c58100
+                , $01829
+                , $c5a100
+                , $01831
+                , $c5c200
+                , $01839
+                , $c5e200
+                , $01841
+                , $cd0000
+                , $0184a
+                , $cd2000
+                , $01852
+                , $cd4000
+                , $0185a
+                , $cd6100
+                , $01862
+                , $cd8100
+                , $0186a
+                , $cda100
+                , $01873
+                , $cdc200
+                , $0187b
+                , $cde200
+                , $01883
+                , $d50000
+                , $0188b
+                , $d52000
+                , $01894
+                , $d54000
+                , $0189c
+                , $d56100
+                , $018a4
+                , $d58100
+                , $018ac
+                , $d5a100
+                , $018b4
+                , $d5c200
+                , $018bd
+                , $d5e200
+                , $018c5
+                , $de0000
+                , $018cd
+                , $de2000
+                , $018d5
+                , $de4000
+                , $018de
+                , $de6100
+                , $018e6
+                , $de8100
+                , $018ee
+                , $dea100
+                , $018f6
+                , $dec200
+                , $018ff
+                , $dee200
+                , $01c00
+                , $e60000
+                , $01c08
+                , $e62000
+                , $01c10
+                , $e64000
+                , $01c18
+                , $e66100
+                , $01c20
+                , $e68100
+                , $01c29
+                , $e6a100
+                , $01c31
+                , $e6c200
+                , $01c39
+                , $e6e200
+                , $01c41
+                , $ee0000
+                , $01c4a
+                , $ee2000
+                , $01c52
+                , $ee4000
+                , $01c5a
+                , $ee6100
+                , $01c62
+                , $ee8100
+                , $01c6a
+                , $eea100
+                , $01c73
+                , $eec200
+                , $01c7b
+                , $eee200
+                , $01c83
+                , $f60000
+                , $01c8b
+                , $f62000
+                , $01c94
+                , $f64000
+                , $01c9c
+                , $f66100
+                , $01ca4
+                , $f68100
+                , $01cac
+                , $f6a100
+                , $01cb4
+                , $f6c200
+                , $01cbd
+                , $f6e200
+                , $01cc5
+                , $ff0000
+                , $01ccd
+                , $ff2000
+                , $01cd5
+                , $ff4000
+                , $01cde
+                , $ff6100
+                , $01ce6
+                , $ff8100
+                , $01cee
+                , $ffa100
+                , $01cf6
+                , $ffc200
+                , $01cff
+                , $ffe200);
+
+  ConvertX86p16_32BGR888_LUT_X86: array [0..511] of DWord = (
+                  $00000
+                , $00000
+                , $80000
+                , $02000
+                , $100000
+                , $04000
+                , $180000
+                , $06100
+                , $200000
+                , $08100
+                , $290000
+                , $0a100
+                , $310000
+                , $0c200
+                , $390000
+                , $0e200
+                , $410000
+                , $00008
+                , $4a0000
+                , $02008
+                , $520000
+                , $04008
+                , $5a0000
+                , $06108
+                , $620000
+                , $08108
+                , $6a0000
+                , $0a108
+                , $730000
+                , $0c208
+                , $7b0000
+                , $0e208
+                , $830000
+                , $00010
+                , $8b0000
+                , $02010
+                , $940000
+                , $04010
+                , $9c0000
+                , $06110
+                , $a40000
+                , $08110
+                , $ac0000
+                , $0a110
+                , $b40000
+                , $0c210
+                , $bd0000
+                , $0e210
+                , $c50000
+                , $00018
+                , $cd0000
+                , $02018
+                , $d50000
+                , $04018
+                , $de0000
+                , $06118
+                , $e60000
+                , $08118
+                , $ee0000
+                , $0a118
+                , $f60000
+                , $0c218
+                , $ff0000
+                , $0e218
+                , $00400
+                , $00020
+                , $80400
+                , $02020
+                , $100400
+                , $04020
+                , $180400
+                , $06120
+                , $200400
+                , $08120
+                , $290400
+                , $0a120
+                , $310400
+                , $0c220
+                , $390400
+                , $0e220
+                , $410400
+                , $00029
+                , $4a0400
+                , $02029
+                , $520400
+                , $04029
+                , $5a0400
+                , $06129
+                , $620400
+                , $08129
+                , $6a0400
+                , $0a129
+                , $730400
+                , $0c229
+                , $7b0400
+                , $0e229
+                , $830400
+                , $00031
+                , $8b0400
+                , $02031
+                , $940400
+                , $04031
+                , $9c0400
+                , $06131
+                , $a40400
+                , $08131
+                , $ac0400
+                , $0a131
+                , $b40400
+                , $0c231
+                , $bd0400
+                , $0e231
+                , $c50400
+                , $00039
+                , $cd0400
+                , $02039
+                , $d50400
+                , $04039
+                , $de0400
+                , $06139
+                , $e60400
+                , $08139
+                , $ee0400
+                , $0a139
+                , $f60400
+                , $0c239
+                , $ff0400
+                , $0e239
+                , $00800
+                , $00041
+                , $80800
+                , $02041
+                , $100800
+                , $04041
+                , $180800
+                , $06141
+                , $200800
+                , $08141
+                , $290800
+                , $0a141
+                , $310800
+                , $0c241
+                , $390800
+                , $0e241
+                , $410800
+                , $0004a
+                , $4a0800
+                , $0204a
+                , $520800
+                , $0404a
+                , $5a0800
+                , $0614a
+                , $620800
+                , $0814a
+                , $6a0800
+                , $0a14a
+                , $730800
+                , $0c24a
+                , $7b0800
+                , $0e24a
+                , $830800
+                , $00052
+                , $8b0800
+                , $02052
+                , $940800
+                , $04052
+                , $9c0800
+                , $06152
+                , $a40800
+                , $08152
+                , $ac0800
+                , $0a152
+                , $b40800
+                , $0c252
+                , $bd0800
+                , $0e252
+                , $c50800
+                , $0005a
+                , $cd0800
+                , $0205a
+                , $d50800
+                , $0405a
+                , $de0800
+                , $0615a
+                , $e60800
+                , $0815a
+                , $ee0800
+                , $0a15a
+                , $f60800
+                , $0c25a
+                , $ff0800
+                , $0e25a
+                , $00c00
+                , $00062
+                , $80c00
+                , $02062
+                , $100c00
+                , $04062
+                , $180c00
+                , $06162
+                , $200c00
+                , $08162
+                , $290c00
+                , $0a162
+                , $310c00
+                , $0c262
+                , $390c00
+                , $0e262
+                , $410c00
+                , $0006a
+                , $4a0c00
+                , $0206a
+                , $520c00
+                , $0406a
+                , $5a0c00
+                , $0616a
+                , $620c00
+                , $0816a
+                , $6a0c00
+                , $0a16a
+                , $730c00
+                , $0c26a
+                , $7b0c00
+                , $0e26a
+                , $830c00
+                , $00073
+                , $8b0c00
+                , $02073
+                , $940c00
+                , $04073
+                , $9c0c00
+                , $06173
+                , $a40c00
+                , $08173
+                , $ac0c00
+                , $0a173
+                , $b40c00
+                , $0c273
+                , $bd0c00
+                , $0e273
+                , $c50c00
+                , $0007b
+                , $cd0c00
+                , $0207b
+                , $d50c00
+                , $0407b
+                , $de0c00
+                , $0617b
+                , $e60c00
+                , $0817b
+                , $ee0c00
+                , $0a17b
+                , $f60c00
+                , $0c27b
+                , $ff0c00
+                , $0e27b
+                , $01000
+                , $00083
+                , $81000
+                , $02083
+                , $101000
+                , $04083
+                , $181000
+                , $06183
+                , $201000
+                , $08183
+                , $291000
+                , $0a183
+                , $311000
+                , $0c283
+                , $391000
+                , $0e283
+                , $411000
+                , $0008b
+                , $4a1000
+                , $0208b
+                , $521000
+                , $0408b
+                , $5a1000
+                , $0618b
+                , $621000
+                , $0818b
+                , $6a1000
+                , $0a18b
+                , $731000
+                , $0c28b
+                , $7b1000
+                , $0e28b
+                , $831000
+                , $00094
+                , $8b1000
+                , $02094
+                , $941000
+                , $04094
+                , $9c1000
+                , $06194
+                , $a41000
+                , $08194
+                , $ac1000
+                , $0a194
+                , $b41000
+                , $0c294
+                , $bd1000
+                , $0e294
+                , $c51000
+                , $0009c
+                , $cd1000
+                , $0209c
+                , $d51000
+                , $0409c
+                , $de1000
+                , $0619c
+                , $e61000
+                , $0819c
+                , $ee1000
+                , $0a19c
+                , $f61000
+                , $0c29c
+                , $ff1000
+                , $0e29c
+                , $01400
+                , $000a4
+                , $81400
+                , $020a4
+                , $101400
+                , $040a4
+                , $181400
+                , $061a4
+                , $201400
+                , $081a4
+                , $291400
+                , $0a1a4
+                , $311400
+                , $0c2a4
+                , $391400
+                , $0e2a4
+                , $411400
+                , $000ac
+                , $4a1400
+                , $020ac
+                , $521400
+                , $040ac
+                , $5a1400
+                , $061ac
+                , $621400
+                , $081ac
+                , $6a1400
+                , $0a1ac
+                , $731400
+                , $0c2ac
+                , $7b1400
+                , $0e2ac
+                , $831400
+                , $000b4
+                , $8b1400
+                , $020b4
+                , $941400
+                , $040b4
+                , $9c1400
+                , $061b4
+                , $a41400
+                , $081b4
+                , $ac1400
+                , $0a1b4
+                , $b41400
+                , $0c2b4
+                , $bd1400
+                , $0e2b4
+                , $c51400
+                , $000bd
+                , $cd1400
+                , $020bd
+                , $d51400
+                , $040bd
+                , $de1400
+                , $061bd
+                , $e61400
+                , $081bd
+                , $ee1400
+                , $0a1bd
+                , $f61400
+                , $0c2bd
+                , $ff1400
+                , $0e2bd
+                , $01800
+                , $000c5
+                , $81800
+                , $020c5
+                , $101800
+                , $040c5
+                , $181800
+                , $061c5
+                , $201800
+                , $081c5
+                , $291800
+                , $0a1c5
+                , $311800
+                , $0c2c5
+                , $391800
+                , $0e2c5
+                , $411800
+                , $000cd
+                , $4a1800
+                , $020cd
+                , $521800
+                , $040cd
+                , $5a1800
+                , $061cd
+                , $621800
+                , $081cd
+                , $6a1800
+                , $0a1cd
+                , $731800
+                , $0c2cd
+                , $7b1800
+                , $0e2cd
+                , $831800
+                , $000d5
+                , $8b1800
+                , $020d5
+                , $941800
+                , $040d5
+                , $9c1800
+                , $061d5
+                , $a41800
+                , $081d5
+                , $ac1800
+                , $0a1d5
+                , $b41800
+                , $0c2d5
+                , $bd1800
+                , $0e2d5
+                , $c51800
+                , $000de
+                , $cd1800
+                , $020de
+                , $d51800
+                , $040de
+                , $de1800
+                , $061de
+                , $e61800
+                , $081de
+                , $ee1800
+                , $0a1de
+                , $f61800
+                , $0c2de
+                , $ff1800
+                , $0e2de
+                , $01c00
+                , $000e6
+                , $81c00
+                , $020e6
+                , $101c00
+                , $040e6
+                , $181c00
+                , $061e6
+                , $201c00
+                , $081e6
+                , $291c00
+                , $0a1e6
+                , $311c00
+                , $0c2e6
+                , $391c00
+                , $0e2e6
+                , $411c00
+                , $000ee
+                , $4a1c00
+                , $020ee
+                , $521c00
+                , $040ee
+                , $5a1c00
+                , $061ee
+                , $621c00
+                , $081ee
+                , $6a1c00
+                , $0a1ee
+                , $731c00
+                , $0c2ee
+                , $7b1c00
+                , $0e2ee
+                , $831c00
+                , $000f6
+                , $8b1c00
+                , $020f6
+                , $941c00
+                , $040f6
+                , $9c1c00
+                , $061f6
+                , $a41c00
+                , $081f6
+                , $ac1c00
+                , $0a1f6
+                , $b41c00
+                , $0c2f6
+                , $bd1c00
+                , $0e2f6
+                , $c51c00
+                , $000ff
+                , $cd1c00
+                , $020ff
+                , $d51c00
+                , $040ff
+                , $de1c00
+                , $061ff
+                , $e61c00
+                , $081ff
+                , $ee1c00
+                , $0a1ff
+                , $f61c00
+                , $0c2ff
+                , $ff1c00
+                , $0e2ff);
+
+  ConvertX86p16_32RGBA888_LUT_X86: array [0..511] of DWord = (
+                  $00000
+                , $000ff
+                , $00800
+                , $2000ff
+                , $01000
+                , $4000ff
+                , $01800
+                , $6100ff
+                , $02000
+                , $8100ff
+                , $02900
+                , $a100ff
+                , $03100
+                , $c200ff
+                , $03900
+                , $e200ff
+                , $04100
+                , $80000ff
+                , $04a00
+                , $82000ff
+                , $05200
+                , $84000ff
+                , $05a00
+                , $86100ff
+                , $06200
+                , $88100ff
+                , $06a00
+                , $8a100ff
+                , $07300
+                , $8c200ff
+                , $07b00
+                , $8e200ff
+                , $08300
+                , $100000ff
+                , $08b00
+                , $102000ff
+                , $09400
+                , $104000ff
+                , $09c00
+                , $106100ff
+                , $0a400
+                , $108100ff
+                , $0ac00
+                , $10a100ff
+                , $0b400
+                , $10c200ff
+                , $0bd00
+                , $10e200ff
+                , $0c500
+                , $180000ff
+                , $0cd00
+                , $182000ff
+                , $0d500
+                , $184000ff
+                , $0de00
+                , $186100ff
+                , $0e600
+                , $188100ff
+                , $0ee00
+                , $18a100ff
+                , $0f600
+                , $18c200ff
+                , $0ff00
+                , $18e200ff
+                , $40000
+                , $200000ff
+                , $40800
+                , $202000ff
+                , $41000
+                , $204000ff
+                , $41800
+                , $206100ff
+                , $42000
+                , $208100ff
+                , $42900
+                , $20a100ff
+                , $43100
+                , $20c200ff
+                , $43900
+                , $20e200ff
+                , $44100
+                , $290000ff
+                , $44a00
+                , $292000ff
+                , $45200
+                , $294000ff
+                , $45a00
+                , $296100ff
+                , $46200
+                , $298100ff
+                , $46a00
+                , $29a100ff
+                , $47300
+                , $29c200ff
+                , $47b00
+                , $29e200ff
+                , $48300
+                , $310000ff
+                , $48b00
+                , $312000ff
+                , $49400
+                , $314000ff
+                , $49c00
+                , $316100ff
+                , $4a400
+                , $318100ff
+                , $4ac00
+                , $31a100ff
+                , $4b400
+                , $31c200ff
+                , $4bd00
+                , $31e200ff
+                , $4c500
+                , $390000ff
+                , $4cd00
+                , $392000ff
+                , $4d500
+                , $394000ff
+                , $4de00
+                , $396100ff
+                , $4e600
+                , $398100ff
+                , $4ee00
+                , $39a100ff
+                , $4f600
+                , $39c200ff
+                , $4ff00
+                , $39e200ff
+                , $80000
+                , $410000ff
+                , $80800
+                , $412000ff
+                , $81000
+                , $414000ff
+                , $81800
+                , $416100ff
+                , $82000
+                , $418100ff
+                , $82900
+                , $41a100ff
+                , $83100
+                , $41c200ff
+                , $83900
+                , $41e200ff
+                , $84100
+                , $4a0000ff
+                , $84a00
+                , $4a2000ff
+                , $85200
+                , $4a4000ff
+                , $85a00
+                , $4a6100ff
+                , $86200
+                , $4a8100ff
+                , $86a00
+                , $4aa100ff
+                , $87300
+                , $4ac200ff
+                , $87b00
+                , $4ae200ff
+                , $88300
+                , $520000ff
+                , $88b00
+                , $522000ff
+                , $89400
+                , $524000ff
+                , $89c00
+                , $526100ff
+                , $8a400
+                , $528100ff
+                , $8ac00
+                , $52a100ff
+                , $8b400
+                , $52c200ff
+                , $8bd00
+                , $52e200ff
+                , $8c500
+                , $5a0000ff
+                , $8cd00
+                , $5a2000ff
+                , $8d500
+                , $5a4000ff
+                , $8de00
+                , $5a6100ff
+                , $8e600
+                , $5a8100ff
+                , $8ee00
+                , $5aa100ff
+                , $8f600
+                , $5ac200ff
+                , $8ff00
+                , $5ae200ff
+                , $c0000
+                , $620000ff
+                , $c0800
+                , $622000ff
+                , $c1000
+                , $624000ff
+                , $c1800
+                , $626100ff
+                , $c2000
+                , $628100ff
+                , $c2900
+                , $62a100ff
+                , $c3100
+                , $62c200ff
+                , $c3900
+                , $62e200ff
+                , $c4100
+                , $6a0000ff
+                , $c4a00
+                , $6a2000ff
+                , $c5200
+                , $6a4000ff
+                , $c5a00
+                , $6a6100ff
+                , $c6200
+                , $6a8100ff
+                , $c6a00
+                , $6aa100ff
+                , $c7300
+                , $6ac200ff
+                , $c7b00
+                , $6ae200ff
+                , $c8300
+                , $730000ff
+                , $c8b00
+                , $732000ff
+                , $c9400
+                , $734000ff
+                , $c9c00
+                , $736100ff
+                , $ca400
+                , $738100ff
+                , $cac00
+                , $73a100ff
+                , $cb400
+                , $73c200ff
+                , $cbd00
+                , $73e200ff
+                , $cc500
+                , $7b0000ff
+                , $ccd00
+                , $7b2000ff
+                , $cd500
+                , $7b4000ff
+                , $cde00
+                , $7b6100ff
+                , $ce600
+                , $7b8100ff
+                , $cee00
+                , $7ba100ff
+                , $cf600
+                , $7bc200ff
+                , $cff00
+                , $7be200ff
+                , $100000
+                , $830000ff
+                , $100800
+                , $832000ff
+                , $101000
+                , $834000ff
+                , $101800
+                , $836100ff
+                , $102000
+                , $838100ff
+                , $102900
+                , $83a100ff
+                , $103100
+                , $83c200ff
+                , $103900
+                , $83e200ff
+                , $104100
+                , $8b0000ff
+                , $104a00
+                , $8b2000ff
+                , $105200
+                , $8b4000ff
+                , $105a00
+                , $8b6100ff
+                , $106200
+                , $8b8100ff
+                , $106a00
+                , $8ba100ff
+                , $107300
+                , $8bc200ff
+                , $107b00
+                , $8be200ff
+                , $108300
+                , $940000ff
+                , $108b00
+                , $942000ff
+                , $109400
+                , $944000ff
+                , $109c00
+                , $946100ff
+                , $10a400
+                , $948100ff
+                , $10ac00
+                , $94a100ff
+                , $10b400
+                , $94c200ff
+                , $10bd00
+                , $94e200ff
+                , $10c500
+                , $9c0000ff
+                , $10cd00
+                , $9c2000ff
+                , $10d500
+                , $9c4000ff
+                , $10de00
+                , $9c6100ff
+                , $10e600
+                , $9c8100ff
+                , $10ee00
+                , $9ca100ff
+                , $10f600
+                , $9cc200ff
+                , $10ff00
+                , $9ce200ff
+                , $140000
+                , $a40000ff
+                , $140800
+                , $a42000ff
+                , $141000
+                , $a44000ff
+                , $141800
+                , $a46100ff
+                , $142000
+                , $a48100ff
+                , $142900
+                , $a4a100ff
+                , $143100
+                , $a4c200ff
+                , $143900
+                , $a4e200ff
+                , $144100
+                , $ac0000ff
+                , $144a00
+                , $ac2000ff
+                , $145200
+                , $ac4000ff
+                , $145a00
+                , $ac6100ff
+                , $146200
+                , $ac8100ff
+                , $146a00
+                , $aca100ff
+                , $147300
+                , $acc200ff
+                , $147b00
+                , $ace200ff
+                , $148300
+                , $b40000ff
+                , $148b00
+                , $b42000ff
+                , $149400
+                , $b44000ff
+                , $149c00
+                , $b46100ff
+                , $14a400
+                , $b48100ff
+                , $14ac00
+                , $b4a100ff
+                , $14b400
+                , $b4c200ff
+                , $14bd00
+                , $b4e200ff
+                , $14c500
+                , $bd0000ff
+                , $14cd00
+                , $bd2000ff
+                , $14d500
+                , $bd4000ff
+                , $14de00
+                , $bd6100ff
+                , $14e600
+                , $bd8100ff
+                , $14ee00
+                , $bda100ff
+                , $14f600
+                , $bdc200ff
+                , $14ff00
+                , $bde200ff
+                , $180000
+                , $c50000ff
+                , $180800
+                , $c52000ff
+                , $181000
+                , $c54000ff
+                , $181800
+                , $c56100ff
+                , $182000
+                , $c58100ff
+                , $182900
+                , $c5a100ff
+                , $183100
+                , $c5c200ff
+                , $183900
+                , $c5e200ff
+                , $184100
+                , $cd0000ff
+                , $184a00
+                , $cd2000ff
+                , $185200
+                , $cd4000ff
+                , $185a00
+                , $cd6100ff
+                , $186200
+                , $cd8100ff
+                , $186a00
+                , $cda100ff
+                , $187300
+                , $cdc200ff
+                , $187b00
+                , $cde200ff
+                , $188300
+                , $d50000ff
+                , $188b00
+                , $d52000ff
+                , $189400
+                , $d54000ff
+                , $189c00
+                , $d56100ff
+                , $18a400
+                , $d58100ff
+                , $18ac00
+                , $d5a100ff
+                , $18b400
+                , $d5c200ff
+                , $18bd00
+                , $d5e200ff
+                , $18c500
+                , $de0000ff
+                , $18cd00
+                , $de2000ff
+                , $18d500
+                , $de4000ff
+                , $18de00
+                , $de6100ff
+                , $18e600
+                , $de8100ff
+                , $18ee00
+                , $dea100ff
+                , $18f600
+                , $dec200ff
+                , $18ff00
+                , $dee200ff
+                , $1c0000
+                , $e60000ff
+                , $1c0800
+                , $e62000ff
+                , $1c1000
+                , $e64000ff
+                , $1c1800
+                , $e66100ff
+                , $1c2000
+                , $e68100ff
+                , $1c2900
+                , $e6a100ff
+                , $1c3100
+                , $e6c200ff
+                , $1c3900
+                , $e6e200ff
+                , $1c4100
+                , $ee0000ff
+                , $1c4a00
+                , $ee2000ff
+                , $1c5200
+                , $ee4000ff
+                , $1c5a00
+                , $ee6100ff
+                , $1c6200
+                , $ee8100ff
+                , $1c6a00
+                , $eea100ff
+                , $1c7300
+                , $eec200ff
+                , $1c7b00
+                , $eee200ff
+                , $1c8300
+                , $f60000ff
+                , $1c8b00
+                , $f62000ff
+                , $1c9400
+                , $f64000ff
+                , $1c9c00
+                , $f66100ff
+                , $1ca400
+                , $f68100ff
+                , $1cac00
+                , $f6a100ff
+                , $1cb400
+                , $f6c200ff
+                , $1cbd00
+                , $f6e200ff
+                , $1cc500
+                , $ff0000ff
+                , $1ccd00
+                , $ff2000ff
+                , $1cd500
+                , $ff4000ff
+                , $1cde00
+                , $ff6100ff
+                , $1ce600
+                , $ff8100ff
+                , $1cee00
+                , $ffa100ff
+                , $1cf600
+                , $ffc200ff
+                , $1cff00
+                , $ffe200ff);
+
+  ConvertX86p16_32BGRA888_LUT_X86: array [0..511] of DWord = (
+                  $00000
+                , $000ff
+                , $8000000
+                , $2000ff
+                , $10000000
+                , $4000ff
+                , $18000000
+                , $6100ff
+                , $20000000
+                , $8100ff
+                , $29000000
+                , $a100ff
+                , $31000000
+                , $c200ff
+                , $39000000
+                , $e200ff
+                , $41000000
+                , $008ff
+                , $4a000000
+                , $2008ff
+                , $52000000
+                , $4008ff
+                , $5a000000
+                , $6108ff
+                , $62000000
+                , $8108ff
+                , $6a000000
+                , $a108ff
+                , $73000000
+                , $c208ff
+                , $7b000000
+                , $e208ff
+                , $83000000
+                , $010ff
+                , $8b000000
+                , $2010ff
+                , $94000000
+                , $4010ff
+                , $9c000000
+                , $6110ff
+                , $a4000000
+                , $8110ff
+                , $ac000000
+                , $a110ff
+                , $b4000000
+                , $c210ff
+                , $bd000000
+                , $e210ff
+                , $c5000000
+                , $018ff
+                , $cd000000
+                , $2018ff
+                , $d5000000
+                , $4018ff
+                , $de000000
+                , $6118ff
+                , $e6000000
+                , $8118ff
+                , $ee000000
+                , $a118ff
+                , $f6000000
+                , $c218ff
+                , $ff000000
+                , $e218ff
+                , $40000
+                , $020ff
+                , $8040000
+                , $2020ff
+                , $10040000
+                , $4020ff
+                , $18040000
+                , $6120ff
+                , $20040000
+                , $8120ff
+                , $29040000
+                , $a120ff
+                , $31040000
+                , $c220ff
+                , $39040000
+                , $e220ff
+                , $41040000
+                , $029ff
+                , $4a040000
+                , $2029ff
+                , $52040000
+                , $4029ff
+                , $5a040000
+                , $6129ff
+                , $62040000
+                , $8129ff
+                , $6a040000
+                , $a129ff
+                , $73040000
+                , $c229ff
+                , $7b040000
+                , $e229ff
+                , $83040000
+                , $031ff
+                , $8b040000
+                , $2031ff
+                , $94040000
+                , $4031ff
+                , $9c040000
+                , $6131ff
+                , $a4040000
+                , $8131ff
+                , $ac040000
+                , $a131ff
+                , $b4040000
+                , $c231ff
+                , $bd040000
+                , $e231ff
+                , $c5040000
+                , $039ff
+                , $cd040000
+                , $2039ff
+                , $d5040000
+                , $4039ff
+                , $de040000
+                , $6139ff
+                , $e6040000
+                , $8139ff
+                , $ee040000
+                , $a139ff
+                , $f6040000
+                , $c239ff
+                , $ff040000
+                , $e239ff
+                , $80000
+                , $041ff
+                , $8080000
+                , $2041ff
+                , $10080000
+                , $4041ff
+                , $18080000
+                , $6141ff
+                , $20080000
+                , $8141ff
+                , $29080000
+                , $a141ff
+                , $31080000
+                , $c241ff
+                , $39080000
+                , $e241ff
+                , $41080000
+                , $04aff
+                , $4a080000
+                , $204aff
+                , $52080000
+                , $404aff
+                , $5a080000
+                , $614aff
+                , $62080000
+                , $814aff
+                , $6a080000
+                , $a14aff
+                , $73080000
+                , $c24aff
+                , $7b080000
+                , $e24aff
+                , $83080000
+                , $052ff
+                , $8b080000
+                , $2052ff
+                , $94080000
+                , $4052ff
+                , $9c080000
+                , $6152ff
+                , $a4080000
+                , $8152ff
+                , $ac080000
+                , $a152ff
+                , $b4080000
+                , $c252ff
+                , $bd080000
+                , $e252ff
+                , $c5080000
+                , $05aff
+                , $cd080000
+                , $205aff
+                , $d5080000
+                , $405aff
+                , $de080000
+                , $615aff
+                , $e6080000
+                , $815aff
+                , $ee080000
+                , $a15aff
+                , $f6080000
+                , $c25aff
+                , $ff080000
+                , $e25aff
+                , $c0000
+                , $062ff
+                , $80c0000
+                , $2062ff
+                , $100c0000
+                , $4062ff
+                , $180c0000
+                , $6162ff
+                , $200c0000
+                , $8162ff
+                , $290c0000
+                , $a162ff
+                , $310c0000
+                , $c262ff
+                , $390c0000
+                , $e262ff
+                , $410c0000
+                , $06aff
+                , $4a0c0000
+                , $206aff
+                , $520c0000
+                , $406aff
+                , $5a0c0000
+                , $616aff
+                , $620c0000
+                , $816aff
+                , $6a0c0000
+                , $a16aff
+                , $730c0000
+                , $c26aff
+                , $7b0c0000
+                , $e26aff
+                , $830c0000
+                , $073ff
+                , $8b0c0000
+                , $2073ff
+                , $940c0000
+                , $4073ff
+                , $9c0c0000
+                , $6173ff
+                , $a40c0000
+                , $8173ff
+                , $ac0c0000
+                , $a173ff
+                , $b40c0000
+                , $c273ff
+                , $bd0c0000
+                , $e273ff
+                , $c50c0000
+                , $07bff
+                , $cd0c0000
+                , $207bff
+                , $d50c0000
+                , $407bff
+                , $de0c0000
+                , $617bff
+                , $e60c0000
+                , $817bff
+                , $ee0c0000
+                , $a17bff
+                , $f60c0000
+                , $c27bff
+                , $ff0c0000
+                , $e27bff
+                , $100000
+                , $083ff
+                , $8100000
+                , $2083ff
+                , $10100000
+                , $4083ff
+                , $18100000
+                , $6183ff
+                , $20100000
+                , $8183ff
+                , $29100000
+                , $a183ff
+                , $31100000
+                , $c283ff
+                , $39100000
+                , $e283ff
+                , $41100000
+                , $08bff
+                , $4a100000
+                , $208bff
+                , $52100000
+                , $408bff
+                , $5a100000
+                , $618bff
+                , $62100000
+                , $818bff
+                , $6a100000
+                , $a18bff
+                , $73100000
+                , $c28bff
+                , $7b100000
+                , $e28bff
+                , $83100000
+                , $094ff
+                , $8b100000
+                , $2094ff
+                , $94100000
+                , $4094ff
+                , $9c100000
+                , $6194ff
+                , $a4100000
+                , $8194ff
+                , $ac100000
+                , $a194ff
+                , $b4100000
+                , $c294ff
+                , $bd100000
+                , $e294ff
+                , $c5100000
+                , $09cff
+                , $cd100000
+                , $209cff
+                , $d5100000
+                , $409cff
+                , $de100000
+                , $619cff
+                , $e6100000
+                , $819cff
+                , $ee100000
+                , $a19cff
+                , $f6100000
+                , $c29cff
+                , $ff100000
+                , $e29cff
+                , $140000
+                , $0a4ff
+                , $8140000
+                , $20a4ff
+                , $10140000
+                , $40a4ff
+                , $18140000
+                , $61a4ff
+                , $20140000
+                , $81a4ff
+                , $29140000
+                , $a1a4ff
+                , $31140000
+                , $c2a4ff
+                , $39140000
+                , $e2a4ff
+                , $41140000
+                , $0acff
+                , $4a140000
+                , $20acff
+                , $52140000
+                , $40acff
+                , $5a140000
+                , $61acff
+                , $62140000
+                , $81acff
+                , $6a140000
+                , $a1acff
+                , $73140000
+                , $c2acff
+                , $7b140000
+                , $e2acff
+                , $83140000
+                , $0b4ff
+                , $8b140000
+                , $20b4ff
+                , $94140000
+                , $40b4ff
+                , $9c140000
+                , $61b4ff
+                , $a4140000
+                , $81b4ff
+                , $ac140000
+                , $a1b4ff
+                , $b4140000
+                , $c2b4ff
+                , $bd140000
+                , $e2b4ff
+                , $c5140000
+                , $0bdff
+                , $cd140000
+                , $20bdff
+                , $d5140000
+                , $40bdff
+                , $de140000
+                , $61bdff
+                , $e6140000
+                , $81bdff
+                , $ee140000
+                , $a1bdff
+                , $f6140000
+                , $c2bdff
+                , $ff140000
+                , $e2bdff
+                , $180000
+                , $0c5ff
+                , $8180000
+                , $20c5ff
+                , $10180000
+                , $40c5ff
+                , $18180000
+                , $61c5ff
+                , $20180000
+                , $81c5ff
+                , $29180000
+                , $a1c5ff
+                , $31180000
+                , $c2c5ff
+                , $39180000
+                , $e2c5ff
+                , $41180000
+                , $0cdff
+                , $4a180000
+                , $20cdff
+                , $52180000
+                , $40cdff
+                , $5a180000
+                , $61cdff
+                , $62180000
+                , $81cdff
+                , $6a180000
+                , $a1cdff
+                , $73180000
+                , $c2cdff
+                , $7b180000
+                , $e2cdff
+                , $83180000
+                , $0d5ff
+                , $8b180000
+                , $20d5ff
+                , $94180000
+                , $40d5ff
+                , $9c180000
+                , $61d5ff
+                , $a4180000
+                , $81d5ff
+                , $ac180000
+                , $a1d5ff
+                , $b4180000
+                , $c2d5ff
+                , $bd180000
+                , $e2d5ff
+                , $c5180000
+                , $0deff
+                , $cd180000
+                , $20deff
+                , $d5180000
+                , $40deff
+                , $de180000
+                , $61deff
+                , $e6180000
+                , $81deff
+                , $ee180000
+                , $a1deff
+                , $f6180000
+                , $c2deff
+                , $ff180000
+                , $e2deff
+                , $1c0000
+                , $0e6ff
+                , $81c0000
+                , $20e6ff
+                , $101c0000
+                , $40e6ff
+                , $181c0000
+                , $61e6ff
+                , $201c0000
+                , $81e6ff
+                , $291c0000
+                , $a1e6ff
+                , $311c0000
+                , $c2e6ff
+                , $391c0000
+                , $e2e6ff
+                , $411c0000
+                , $0eeff
+                , $4a1c0000
+                , $20eeff
+                , $521c0000
+                , $40eeff
+                , $5a1c0000
+                , $61eeff
+                , $621c0000
+                , $81eeff
+                , $6a1c0000
+                , $a1eeff
+                , $731c0000
+                , $c2eeff
+                , $7b1c0000
+                , $e2eeff
+                , $831c0000
+                , $0f6ff
+                , $8b1c0000
+                , $20f6ff
+                , $941c0000
+                , $40f6ff
+                , $9c1c0000
+                , $61f6ff
+                , $a41c0000
+                , $81f6ff
+                , $ac1c0000
+                , $a1f6ff
+                , $b41c0000
+                , $c2f6ff
+                , $bd1c0000
+                , $e2f6ff
+                , $c51c0000
+                , $0ffff
+                , $cd1c0000
+                , $20ffff
+                , $d51c0000
+                , $40ffff
+                , $de1c0000
+                , $61ffff
+                , $e61c0000
+                , $81ffff
+                , $ee1c0000
+                , $a1ffff
+                , $f61c0000
+                , $c2ffff
+                , $ff1c0000
+                , $e2ffff);

+ 0 - 297
packages/hermes/src/i386/x86_clr.as

@@ -1,297 +0,0 @@
-#
-# x86 surface clear routines for HERMES
-# Copyright (c) 1998 Christian Nentwich ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-# (04/10/99)    Modified ClearX86_8             <[email protected]>
-
-
-
-
-.globl _ClearX86_32
-.globl _ClearX86_24
-.globl _ClearX86_16
-.globl _ClearX86_8
-
-.text
-
-##
-## --------------------------------------------------------------------------
-## HermesClearInterface (ebp+..)
-##   0: char8 *dest
-##   4: int32 value
-##   8: unsigned int width (already checked to be >0!)
-##  12: unsigned int height (already checked to be >0!)
-##  16: int add
-
-.align 8
-_ClearX86_32:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl (%ebp),%edi        # destination
-        movl 4(%ebp),%eax       # pixel value
-
-        movl 12(%ebp),%edx      # height
-.align 4
-_ClearX86_32.L_y:
-        movl 8(%ebp),%ecx
-        rep
- stosl
-
-        addl 16(%ebp),%edi
-
-        decl %edx
-        jnz _ClearX86_32.L_y
-
-        popl %ebp
-        ret
-
-
-
-_ClearX86_24:
-        ret
-
-
-
-.align 8
-_ClearX86_16:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl (%ebp),%edi        # destination
-        movl 4(%ebp),%eax       # pixel value
-
-        movl 12(%ebp),%edx      # height
-        movl %eax,%ebx
-
-        shll $16,%eax           # Duplicate pixel value
-        andl $0x0ffff,%ebx
-
-        orl %ebx,%eax
-_ClearX86_16.L_y:
-        movl 8(%ebp),%ecx
-
-        testl $3,%edi           # Check if destination is aligned mod 4
-        jz _ClearX86_16.L_aligned
-
-        movw %ax,(%edi)         # otherwise write one pixel
-        addl $2,%edi
-
-        decl %ecx
-        jz _ClearX86_16.L_endline
-
-_ClearX86_16.L_aligned:
-        shrl %ecx
-
-rep
- stosl
-
-        jnc _ClearX86_16.L_endline
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-_ClearX86_16.L_endline:
-        addl 16(%ebp),%edi
-
-        decl %edx
-        jnz _ClearX86_16.L_y
-
-        popl %ebp
-        ret
-
-
-
-.align 8
-_ClearX86_8:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl 4(%ebp),%eax       # pixel value
-        movl 12(%ebp),%edx      # height
-
-        movb %al,%ah
-        movl (%ebp),%edi        # destination
-
-        movl %eax,%ecx
-
-        shll $16,%eax           # Put the byte pixel value in all four bytes
-        andl $0x0ffff,%ecx      # of eax
-
-        movl 8(%ebp),%ebx
-        orl %ecx,%eax
-
-        cmpl $5,%ebx            # removes need for extra checks later
-        jbe _ClearX86_8.L_short_y
-
-.align 4
-_ClearX86_8.L_y:
-        testl $3,%edi
-        jz _ClearX86_8.L_aligned
-
-        movl %edi,%ecx
-        negl %ecx
-        andl $3,%ecx
-
-        subl %ecx,%ebx
-
-        rep
- stosb
-
-_ClearX86_8.L_aligned:
-        movl %ebx,%ecx
-
-        shrl $2,%ecx
-        andl $3,%ebx
-
-        rep
- stosl
-
-        movl %ebx,%ecx
-        rep
- stosb
-
-        addl 16(%ebp),%edi
-
-        decl %edx
-        movl 8(%ebp),%ebx
-        jnz _ClearX86_8.L_y
-
-        popl %ebp
-        ret
-
-## Short loop
-.align 4
-_ClearX86_8.L_short_y:
-        movl %ebx,%ecx
-
-        rep
- stosb
-        addl 16(%ebp),%edi
-
-        decl %edx
-        jnz _ClearX86_8.L_short_y
-
-        popl %ebp
-        ret
-
-## ClearX86_8 version 2,
-## Im not sure wheather this is faster or not...
-## too many jumps could confuse the CPU branch quessing
-
-
-.align 8
-_ClearX86_8_2:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl 4(%ebp),%eax       # pixel value
-        movl 12(%ebp),%edx      # height
-
-        movb %al,%ah
-        movl (%ebp),%edi        # destination
-
-        movl %eax,%ecx
-
-        shll $16,%eax           # Put the byte pixel value in all four bytes
-        andl $0x0ffff,%ecx      # of eax
-
-        movl 8(%ebp),%ebx
-        orl %ecx,%eax
-
-        cmpl $5,%ebx            # removes need for extra checks in main loop
-        jbe _ClearX86_8_2.L_short_y
-
-
-.align 4
-_ClearX86_8_2.L_y:
-        testl $3,%edi
-        jz _ClearX86_8_2.L_aligned
-
-        movl %edi,%ecx
-        negl %ecx
-        andl $3,%ecx
-
-        movb %al,(%edi)
-        subl %ecx,%ebx
-
-        incl %edi
-
-        decl %ecx
-        jz _ClearX86_8_2.L_aligned
-
-        movb %al,(%edi)
-        incl %edi
-        decl %ecx
-        jz _ClearX86_8_2.L_aligned
-
-        movb %al,(%edi)
-        incl %edi
-
-_ClearX86_8_2.L_aligned:
-        movl %ebx,%ecx
-
-        shrl $2,%ecx
-        andl $3,%ebx
-
-        rep
- stosl
-
-        jz _ClearX86_8_2.L_endline
-                # ebx
-
-        movb %al,(%edi)
-                # Write remaining (1,2 or 3) pixels
-        incl %edi
-        decl %ebx
-        jz _ClearX86_8_2.L_endline
-
-        movb %al,(%edi)
-        incl %edi
-        decl %ebx
-        jz _ClearX86_8_2.L_endline
-
-        movb %al,(%edi)
-        incl %edi
-        decl %ebx
-        jz _ClearX86_8_2.L_endline
-
-        movb %al,(%edi)
-        incl %edi
-
-_ClearX86_8_2.L_endline:
-        addl 16(%ebp),%edi
-
-        decl %edx
-        movl 8(%ebp),%ebx
-        jnz _ClearX86_8_2.L_y
-
-        popl %ebp
-        ret
-
-## Short loop
-.align 4
-_ClearX86_8_2.L_short_y:
-        movl %ebx,%ecx
-
-        rep
- stosb
-        addl 16(%ebp),%edi
-
-        decl %edx
-        jnz _ClearX86_8_2.L_short_y
-
-        popl %ebp
-        ret

+ 305 - 0
packages/hermes/src/i386/x86_clr.inc

@@ -0,0 +1,305 @@
+{
+    x86 surface clear routines for HERMES
+    Copyright (c) 1998 Christian Nentwich ([email protected])
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+    (04/10/99)    Modified ClearX86_8             <[email protected]>
+}
+
+{$ASMMODE att}
+
+{
+ --------------------------------------------------------------------------
+ HermesClearInterface (ebp+..)
+   0: char8 *dest
+   4: int32 value
+   8: unsigned int width (already checked to be >0!)
+  12: unsigned int height (already checked to be >0!)
+  16: int add
+}
+procedure ClearX86_32(hci: PHermesClearInterface); cdecl; assembler;
+asm
+        pushl %ebp
+
+        movl 8(%ebp),%ebp
+
+        movl (%ebp),%edi        // destination
+        movl 4(%ebp),%eax       // pixel value
+
+        movl 12(%ebp),%edx      // height
+.balign 4
+.L_y:
+        movl 8(%ebp),%ecx
+        rep
+ stosl
+
+        addl 16(%ebp),%edi
+
+        decl %edx
+        jnz .L_y
+
+        popl %ebp
+end;
+
+procedure ClearX86_24(hci: PHermesClearInterface); cdecl; assembler;
+asm
+end;
+
+procedure ClearX86_16(hci: PHermesClearInterface); cdecl; assembler;
+asm
+        pushl %ebp
+
+        movl 8(%ebp),%ebp
+
+        movl (%ebp),%edi        // destination
+        movl 4(%ebp),%eax       // pixel value
+
+        movl 12(%ebp),%edx      // height
+        movl %eax,%ebx
+
+        shll $16,%eax           // Duplicate pixel value
+        andl $0x0ffff,%ebx
+
+        orl %ebx,%eax
+.L_y:
+        movl 8(%ebp),%ecx
+
+        testl $3,%edi           // Check if destination is aligned mod 4
+        jz .L_aligned
+
+        movw %ax,(%edi)         // otherwise write one pixel
+        addl $2,%edi
+
+        decl %ecx
+        jz .L_endline
+
+.L_aligned:
+        shrl $1,%ecx
+
+rep
+ stosl
+
+        jnc .L_endline
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+.L_endline:
+        addl 16(%ebp),%edi
+
+        decl %edx
+        jnz .L_y
+
+        popl %ebp
+end;
+
+procedure ClearX86_8(hci: PHermesClearInterface); cdecl; nostackframe; assembler;
+asm
+        pushl %ebp
+        movl %esp,%ebp
+
+        movl 8(%ebp),%ebp
+
+        movl 4(%ebp),%eax       // pixel value
+        movl 12(%ebp),%edx      // height
+
+        movb %al,%ah
+        movl (%ebp),%edi        // destination
+
+        movl %eax,%ecx
+
+        shll $16,%eax           // Put the byte pixel value in all four bytes
+        andl $0x0ffff,%ecx      // of eax
+
+        movl 8(%ebp),%ebx
+        orl %ecx,%eax
+
+        cmpl $5,%ebx            // removes need for extra checks later
+        jbe .L_short_y
+
+.balign 4
+.L_y:
+        testl $3,%edi
+        jz .L_aligned
+
+        movl %edi,%ecx
+        negl %ecx
+        andl $3,%ecx
+
+        subl %ecx,%ebx
+
+        rep
+ stosb
+
+.L_aligned:
+        movl %ebx,%ecx
+
+        shrl $2,%ecx
+        andl $3,%ebx
+
+        rep
+ stosl
+
+        movl %ebx,%ecx
+        rep
+ stosb
+
+        addl 16(%ebp),%edi
+
+        decl %edx
+        movl 8(%ebp),%ebx
+        jnz .L_y
+
+        popl %ebp
+        ret
+
+// Short loop
+.balign 4
+.L_short_y:
+        movl %ebx,%ecx
+
+        rep
+ stosb
+        addl 16(%ebp),%edi
+
+        decl %edx
+        jnz .L_short_y
+
+        popl %ebp
+end;
+
+{
+ ClearX86_8 version 2,
+ Im not sure wheather this is faster or not...
+ too many jumps could confuse the CPU branch guessing
+}
+procedure ClearX86_8_2(hci: PHermesClearInterface); cdecl; nostackframe; assembler;
+asm
+        pushl %ebp
+        movl %esp,%ebp
+
+        movl 8(%ebp),%ebp
+
+        movl 4(%ebp),%eax       // pixel value
+        movl 12(%ebp),%edx      // height
+
+        movb %al,%ah
+        movl (%ebp),%edi        // destination
+
+        movl %eax,%ecx
+
+        shll $16,%eax           // Put the byte pixel value in all four bytes
+        andl $0x0ffff,%ecx      // of eax
+
+        movl 8(%ebp),%ebx
+        orl %ecx,%eax
+
+        cmpl $5,%ebx            // removes need for extra checks in main loop
+        jbe .L_short_y
+
+
+.balign 4
+.L_y:
+        testl $3,%edi
+        jz .L_aligned
+
+        movl %edi,%ecx
+        negl %ecx
+        andl $3,%ecx
+
+        movb %al,(%edi)
+        subl %ecx,%ebx
+
+        incl %edi
+
+        decl %ecx
+        jz .L_aligned
+
+        movb %al,(%edi)
+        incl %edi
+        decl %ecx
+        jz .L_aligned
+
+        movb %al,(%edi)
+        incl %edi
+
+.L_aligned:
+        movl %ebx,%ecx
+
+        shrl $2,%ecx
+        andl $3,%ebx
+
+        rep
+ stosl
+
+        jz .L_endline
+                // ebx
+
+        movb %al,(%edi)
+                // Write remaining (1,2 or 3) pixels
+        incl %edi
+        decl %ebx
+        jz .L_endline
+
+        movb %al,(%edi)
+        incl %edi
+        decl %ebx
+        jz .L_endline
+
+        movb %al,(%edi)
+        incl %edi
+        decl %ebx
+        jz .L_endline
+
+        movb %al,(%edi)
+        incl %edi
+
+.L_endline:
+        addl 16(%ebp),%edi
+
+        decl %edx
+        movl 8(%ebp),%ebx
+        jnz .L_y
+
+        popl %ebp
+        ret
+
+// Short loop
+.balign 4
+.L_short_y:
+        movl %ebx,%ecx
+
+        rep
+ stosb
+        addl 16(%ebp),%edi
+
+        decl %edx
+        jnz .L_short_y
+
+        popl %ebp
+end;

+ 0 - 182
packages/hermes/src/i386/x86_main.as

@@ -1,182 +0,0 @@
-#
-# x86 format converters for HERMES
-# Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-
-
-
-.globl _ConvertX86
-.globl _ConvertX86Stretch
-.globl _x86return
-.globl _x86return_S
-
-.globl _Hermes_X86_CPU
-
-
-.data
-
-cpu_flags: .long 0
-
-.text
-
-.equ s_pixels, 0
-.equ s_width, 4
-.equ s_height, 8
-.equ s_add, 12
-.equ d_pixels, 16
-.equ d_width, 20
-.equ d_height, 24
-.equ d_add, 28
-.equ conv_func, 32
-.equ lookup, 36
-.equ s_pitch, 40
-.equ d_pitch, 44
-
-## _ConvertX86:
-## [ESP+8] ConverterInfo*
-## --------------------------------------------------------------------------
-##
-## ConvertX86Stretch 'abuses' the following info structure fields:
-##      - d_pitch for the y increment
-##      - s_add for the x increment
-## because they're unused anyway and this is thread safe.. (it's a per
-## converter handle structure)
-_ConvertX86Stretch:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl s_height(%ebp),%eax
-        sall $16,%eax
-        cdq
-        idivl d_height(%ebp)
-        movl %eax,d_pitch(%ebp)
-
-        movl s_width(%ebp),%eax
-        sall $16,%eax
-        cdq
-        idivl d_width(%ebp)
-        movl %eax,s_add(%ebp)
-
-        movl $0,s_height(%ebp)
-
-        movl d_pixels(%ebp),%edi
-        movl s_pixels(%ebp),%esi
-
-        movl s_add(%ebp),%edx
-        movl d_width(%ebp),%ecx
-        jmp *conv_func(%ebp)
-
-.align 8
-_x86return_S:
-
-        decl d_height(%ebp)
-        jz endconvert_S
-
-        movl s_height(%ebp),%eax
-        addl d_add(%ebp),%edi
-
-        addl d_pitch(%ebp),%eax
-
-        movl %eax,%ebx
-
-        shrl $16,%eax
-
-        mull s_pitch(%ebp)
-        andl $0x0ffff,%ebx
-
-        movl %ebx,s_height(%ebp)
-        addl %eax,%esi
-
-        movl s_add(%ebp),%edx
-        movl d_width(%ebp),%ecx
-
-        jmp *conv_func(%ebp)
-
-endconvert_S:
-
-        popl %ebp
-        ret
-
-
-
-_ConvertX86:
-        pushl %ebp
-        movl %esp,%ebp
-
-        movl 8(%ebp),%ebp
-
-        movl s_pixels(%ebp),%esi
-        movl d_width(%ebp),%ecx
-        movl d_pixels(%ebp),%edi
-
-        jmp *32(%ebp)
-
-.align 8
-_x86return:
-        decl s_height(%ebp)
-        jz endconvert
-
-        movl d_width(%ebp),%ecx
-        addl s_add(%ebp),%esi
-        addl d_add(%ebp),%edi
-
-        jmp *32(%ebp)
-
-
-endconvert:
-        popl %ebp
-        ret
-
-
-
-## Hermes_X86_CPU returns the CPUID flags in eax
-
-_Hermes_X86_CPU:
-        pushfl
-        popl %eax
-
-        movl %eax,%ecx
-
-        xorl $0x040000,%eax
-        pushl %eax
-
-        popfl
-        pushfl
-
-        popl %eax
-        xorl %ecx,%eax
-        jz _Hermes_X86_CPU.L1   # Processor is 386
-
-        pushl %ecx
-        popfl
-
-        movl %ecx,%eax
-        xorl $0x200000,%eax
-
-        pushl %eax
-        popfl
-        pushfl
-
-        popl %eax
-        xorl %ecx,%eax
-        je _Hermes_X86_CPU.L1
-
-        pusha
-
-        movl $1,%eax
-        cpuid
-
-        movl %edx,cpu_flags
-
-        popa
-
-        movl cpu_flags,%eax
-
-_Hermes_X86_CPU.L1:
-        ret

+ 182 - 0
packages/hermes/src/i386/x86_main.inc

@@ -0,0 +1,182 @@
+{
+    x86 format converters for HERMES
+    Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+label
+  _x86return, _x86return_S;
+
+var
+  cpu_flags: DWord = 0;
+
+{ _ConvertX86:
+ [ESP+8] ConverterInfo*
+ --------------------------------------------------------------------------
+
+ ConvertX86Stretch 'abuses' the following info structure fields:
+      - d_pitch for the y increment
+      - s_add for the x increment
+ because they're unused anyway and this is thread safe.. (it's a per
+ converter handle structure)
+}
+procedure ConvertX86Stretch(hci: PHermesConverterInterface); cdecl; assembler;
+label
+  endconvert_S;
+asm
+        pushl %ebp
+
+        movl 8(%ebp),%ebp
+
+        movl 8(%ebp),%eax  // s_height
+        sall $16,%eax
+        cltd
+        idivl 24(%ebp)  // d_height
+        movl %eax,44(%ebp)  // d_pitch
+
+        movl 4(%ebp),%eax  // s_width
+        sall $16,%eax
+        cltd
+        idivl 20(%ebp)  // d_width
+        movl %eax,12(%ebp)  // s_add
+
+        movl $0,8(%ebp)  // s_height
+
+        movl 16(%ebp),%edi  // d_pixels
+        movl 0(%ebp),%esi  // s_pixels
+
+        movl 12(%ebp),%edx  // s_add
+        movl 20(%ebp),%ecx  // d_width
+        jmp *32(%ebp)  // conv_func
+
+.balign 8
+_x86return_S:
+
+        decl 24(%ebp)  // d_height
+        jz endconvert_S
+
+        movl 8(%ebp),%eax  // s_height
+        addl 28(%ebp),%edi  // d_add
+
+        addl 44(%ebp),%eax  // d_pitch
+
+        movl %eax,%ebx
+
+        shrl $16,%eax
+
+        mull 40(%ebp)  // s_pitch
+        andl $0x0ffff,%ebx
+
+        movl %ebx,8(%ebp)  // s_height
+        addl %eax,%esi
+
+        movl 12(%ebp),%edx  // s_add
+        movl 20(%ebp),%ecx  // d_width
+
+        jmp *32(%ebp)  // conv_func
+
+endconvert_S:
+
+        popl %ebp
+end;
+
+procedure ConvertX86(hci: PHermesConverterInterface); cdecl; assembler;
+label
+  endconvert;
+asm
+        pushl %ebp
+
+        movl 8(%ebp),%ebp
+
+        movl 0(%ebp),%esi  // s_pixels
+        movl 20(%ebp),%ecx  // d_width
+        movl 16(%ebp),%edi  // d_pixels
+
+        jmp *32(%ebp)
+
+.balign 8
+_x86return:
+        decl 8(%ebp)  // s_height
+        jz endconvert
+
+        movl 20(%ebp),%ecx  // d_width
+        addl 12(%ebp),%esi  // s_add
+        addl 28(%ebp),%edi  // d_add
+
+        jmp *32(%ebp)
+
+
+endconvert:
+        popl %ebp
+end;
+
+// Hermes_X86_CPU returns the CPUID flags in eax
+function Hermes_X86_CPU: Integer; cdecl; assembler;
+asm
+        pushfl
+        popl %eax
+
+        movl %eax,%ecx
+
+        xorl $0x040000,%eax
+        pushl %eax
+
+        popfl
+        pushfl
+
+        popl %eax
+        xorl %ecx,%eax
+        jz .L1   // Processor is 386
+
+        pushl %ecx
+        popfl
+
+        movl %ecx,%eax
+        xorl $0x200000,%eax
+
+        pushl %eax
+        popfl
+        pushfl
+
+        popl %eax
+        xorl %ecx,%eax
+        je .L1
+
+        pusha
+
+        movl $1,%eax
+        cpuid
+
+        movl %edx,cpu_flags
+
+        popa
+
+        movl cpu_flags,%eax
+
+.L1:
+end;

+ 0 - 1154
packages/hermes/src/i386/x86p_16.as

@@ -1,1154 +0,0 @@
-#
-# x86 format converters for HERMES
-# Copyright (c) 1998 Glenn Fielder ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-# Routines adjusted for Hermes by Christian Nentwich ([email protected])
-# Used with permission.
-#
-
-
-#BITS 32
-
-.globl _ConvertX86p16_32RGB888
-.globl _ConvertX86p16_32BGR888
-.globl _ConvertX86p16_32RGBA888
-.globl _ConvertX86p16_32BGRA888
-.globl _ConvertX86p16_24RGB888
-.globl _ConvertX86p16_24BGR888
-.globl _ConvertX86p16_16BGR565
-.globl _ConvertX86p16_16RGB555
-.globl _ConvertX86p16_16BGR555
-.globl _ConvertX86p16_8RGB332
-
-.extern _ConvertX86
-.extern _x86return
-
-.globl _ConvertX86p16_32RGB888_LUT_X86
-.globl _ConvertX86p16_32BGR888_LUT_X86
-.globl _ConvertX86p16_32RGBA888_LUT_X86
-.globl _ConvertX86p16_32BGRA888_LUT_X86
-
-.include "src/i386/x8616lut.as"
-
-.text
-
-
-
-_ConvertX86p16_32RGB888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p16_32RGB888.L3
-
-
-    # short loop
-    xorl %ebx,%ebx
-_ConvertX86p16_32RGB888.L1: movb (%esi),%bl             # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax  # eax = ARGB8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                    # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx# edx = ARGB8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p16_32RGB888.L1
-_ConvertX86p16_32RGB888.L2:
-    jmp _x86return
-
-
-_ConvertX86p16_32RGB888.L3:  # save ebp
-    pushl %ebp
-
-    # save count
-    pushl %ecx
-
-    # unroll twice
-    movl %ecx,%ebp
-    shrl %ebp
-
-    # point arrays to end
-    leal (%esi,%ebp,4),%esi
-    leal (%edi,%ebp,8),%edi
-
-    # negative counter
-    negl %ebp
-
-    # clear
-    xorl %ebx,%ebx
-    xorl %ecx,%ecx
-
-    # prestep
-    movb (%esi,%ebp,4),%cl
-    movb 1(%esi,%ebp,4),%bl
-
-_ConvertX86p16_32RGB888.L4: movl _ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
-        movb 2(%esi,%ebp,4),%cl
-
-        movl _ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
-        movb 3(%esi,%ebp,4),%bl
-
-        addl %edx,%eax
-        movl _ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
-
-        movl %eax,(%edi,%ebp,8)
-        movl _ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
-
-        addl %edx,%eax
-        movb 4(%esi,%ebp,4),%cl
-
-        movl %eax,4(%edi,%ebp,8)
-        movb 5(%esi,%ebp,4),%bl
-
-        incl %ebp
-        jnz _ConvertX86p16_32RGB888.L4
-
-    # tail
-    popl %ecx
-    andl $1,%ecx
-    jz _ConvertX86p16_32RGB888.L6
-    xorl %ebx,%ebx
-    movb (%esi),%bl                                     # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax  # eax = ARGB8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                    # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx# edx = ARGB8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-
-_ConvertX86p16_32RGB888.L6: popl %ebp
-    jmp _x86return
-
-
-
-
-
-
-_ConvertX86p16_32BGR888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p16_32BGR888.L3
-
-
-    # short loop
-    xorl %ebx,%ebx
-_ConvertX86p16_32BGR888.L1: movb (%esi),%bl             # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax  # eax = ABGR8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                    # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx# edx = ABGR8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p16_32BGR888.L1
-_ConvertX86p16_32BGR888.L2:
-    jmp _x86return
-
-_ConvertX86p16_32BGR888.L3:  # save ebp
-    pushl %ebp
-
-    # save count
-    pushl %ecx
-
-    # unroll twice
-    movl %ecx,%ebp
-    shrl %ebp
-
-    # point arrays to end
-    leal (%esi,%ebp,4),%esi
-    leal (%edi,%ebp,8),%edi
-
-    # negative counter
-    negl %ebp
-
-    # clear
-    xorl %ebx,%ebx
-    xorl %ecx,%ecx
-
-    # prestep
-    movb (%esi,%ebp,4),%cl
-    movb 1(%esi,%ebp,4),%bl
-
-_ConvertX86p16_32BGR888.L4: movl _ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
-        movb 2(%esi,%ebp,4),%cl
-
-        movl _ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
-        movb 3(%esi,%ebp,4),%bl
-
-        addl %edx,%eax
-        movl _ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
-
-        movl %eax,(%edi,%ebp,8)
-        movl _ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
-
-        addl %edx,%eax
-        movb 4(%esi,%ebp,4),%cl
-
-        movl %eax,4(%edi,%ebp,8)
-        movb 5(%esi,%ebp,4),%bl
-
-        incl %ebp
-        jnz _ConvertX86p16_32BGR888.L4
-
-    # tail
-    popl %ecx
-    andl $1,%ecx
-    jz _ConvertX86p16_32BGR888.L6
-    xorl %ebx,%ebx
-    movb (%esi),%bl                                     # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax  # eax = ABGR8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                    # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx# edx = ABGR8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-
-_ConvertX86p16_32BGR888.L6: popl %ebp
-    jmp _x86return
-
-
-
-
-
-
-_ConvertX86p16_32RGBA888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p16_32RGBA888.L3
-
-
-    # short loop
-    xorl %ebx,%ebx
-_ConvertX86p16_32RGBA888.L1: movb (%esi),%bl             # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax  # eax = RGBA8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                     # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx# edx = RGBA8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p16_32RGBA888.L1
-_ConvertX86p16_32RGBA888.L2:
-    jmp _x86return
-
-_ConvertX86p16_32RGBA888.L3:  # save ebp
-    pushl %ebp
-
-    # save count
-    pushl %ecx
-
-    # unroll twice
-    movl %ecx,%ebp
-    shrl %ebp
-
-    # point arrays to end
-    leal (%esi,%ebp,4),%esi
-    leal (%edi,%ebp,8),%edi
-
-    # negative counter
-    negl %ebp
-
-    # clear
-    xorl %ebx,%ebx
-    xorl %ecx,%ecx
-
-    # prestep
-    movb (%esi,%ebp,4),%cl
-    movb 1(%esi,%ebp,4),%bl
-
-_ConvertX86p16_32RGBA888.L4: movl _ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
-        movb 2(%esi,%ebp,4),%cl
-
-        movl _ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
-        movb 3(%esi,%ebp,4),%bl
-
-        addl %edx,%eax
-        movl _ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
-
-        movl %eax,(%edi,%ebp,8)
-        movl _ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
-
-        addl %edx,%eax
-        movb 4(%esi,%ebp,4),%cl
-
-        movl %eax,4(%edi,%ebp,8)
-        movb 5(%esi,%ebp,4),%bl
-
-        incl %ebp
-        jnz _ConvertX86p16_32RGBA888.L4
-
-    # tail
-    popl %ecx
-    andl $1,%ecx
-    jz _ConvertX86p16_32RGBA888.L6
-    xorl %ebx,%ebx
-    movb (%esi),%bl                                      # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax  # eax = RGBA8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                     # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx# edx = RGBA8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-
-_ConvertX86p16_32RGBA888.L6: popl %ebp
-    jmp _x86return
-
-
-
-
-
-
-_ConvertX86p16_32BGRA888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p16_32BGRA888.L3
-
-    # short loop
-    xorl %ebx,%ebx
-_ConvertX86p16_32BGRA888.L1: movb (%esi),%bl             # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax  # eax = BGRA8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                     # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx# edx = BGRA8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p16_32BGRA888.L1
-_ConvertX86p16_32BGRA888.L2:
-    jmp _x86return
-
-_ConvertX86p16_32BGRA888.L3:  # save ebp
-    pushl %ebp
-
-    # save count
-    pushl %ecx
-
-    # unroll twice
-    movl %ecx,%ebp
-    shrl %ebp
-
-    # point arrays to end
-    leal (%esi,%ebp,4),%esi
-    leal (%edi,%ebp,8),%edi
-
-    # negative counter
-    negl %ebp
-
-    # clear
-    xorl %ebx,%ebx
-    xorl %ecx,%ecx
-
-    # prestep
-    movb (%esi,%ebp,4),%cl
-    movb 1(%esi,%ebp,4),%bl
-
-_ConvertX86p16_32BGRA888.L4: movl _ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
-        movb 2(%esi,%ebp,4),%cl
-
-        movl _ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
-        movb 3(%esi,%ebp,4),%bl
-
-        addl %edx,%eax
-        movl _ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
-
-        movl %eax,(%edi,%ebp,8)
-        movl _ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
-
-        addl %edx,%eax
-        movb 4(%esi,%ebp,4),%cl
-
-        movl %eax,4(%edi,%ebp,8)
-        movb 5(%esi,%ebp,4),%bl
-
-        incl %ebp
-        jnz _ConvertX86p16_32BGRA888.L4
-
-    # tail
-    popl %ecx
-    andl $1,%ecx
-    jz _ConvertX86p16_32BGRA888.L6
-    xorl %ebx,%ebx
-    movb (%esi),%bl                                      # ebx = lower byte pixel 1
-    movl _ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax  # eax = BGRA8888 of lower byte pixel 1
-    movb 1(%esi),%bl                                     # ebx = upper byte pixel 1
-    movl _ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx# edx = BGRA8888 of upper byte pixel 1
-    addl %edx,%eax
-    movl %eax,(%edi)
-    addl $2,%esi
-    addl $4,%edi
-
-_ConvertX86p16_32BGRA888.L6: popl %ebp
-    jmp _x86return
-
-
-
-
-
-
-_ConvertX86p16_24RGB888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p16_24RGB888.L3
-
-
-    # short loop
-    xorl %edx,%edx
-_ConvertX86p16_24RGB888.L1: movb (%esi),%dl
-    movl _ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax    # eax = ARGB8888 of lower byte
-    movb 1(%esi),%dl
-    movl _ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx  # ebx = ARGB8888 of upper byte
-    addl %ebx,%eax                                        # eax = ARGB8888 pixel
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    shrl $16,%eax
-    movb %al,2(%edi)
-    addl $2,%esi
-    addl $3,%edi
-    decl %ecx
-    jnz _ConvertX86p16_24RGB888.L1
-_ConvertX86p16_24RGB888.L2: jmp _x86return
-
-
-_ConvertX86p16_24RGB888.L3:  # clear edx
-    xorl %edx,%edx
-
-_ConvertX86p16_24RGB888.L4:  # head
-    movl %edi,%eax
-    andl $0b11,%eax
-    jz _ConvertX86p16_24RGB888.L5
-    movb (%esi),%dl
-    movl _ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax    # eax = ARGB8888 of lower byte
-    movb 1(%esi),%dl
-    movl _ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx  # ebx = ARGB8888 of upper byte
-    addl %ebx,%eax                                        # eax = ARGB8888 pixel
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    shrl $16,%eax
-    movb %al,2(%edi)
-    addl $2,%esi
-    addl $3,%edi
-    decl %ecx
-    jmp _ConvertX86p16_24RGB888.L4
-
-_ConvertX86p16_24RGB888.L5:  # unroll 4 times
-    pushl %ebp
-    movl %ecx,%ebp
-    shrl $2,%ebp
-
-    # clear ebx
-    xorl %ebx,%ebx
-
-    # save count
-    pushl %ecx
-
-    # prestep
-    movb (%esi),%bl                                     # ebx = lower byte pixel 1
-    movb 1(%esi),%dl                                    # edx = upper byte pixel 1
-
-_ConvertX86p16_24RGB888.L6: movl _ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax # eax = ARGB8888 of lower byte pixel 1
-        movb 2(%esi),%bl                                    # ebx = lower byte pixel 2
-
-        movl _ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx    # ecx = ARGB8888 of upper byte pixel 1
-        movb 3(%esi),%dl                                    # edx = upper byte pixel 2
-
-        pushl %ebp                                          # save ebp
-        addl %ecx,%eax                                      # eax = ARGB8888 of pixel 1
-
-        movl _ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp      # ebp = ARGB8888 of lower byte pixel 2
-        movl _ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx    # ecx = ARGB8888 of upper byte pixel 2
-
-        movb 4(%esi),%bl                                    # ebx = lower byte pixel 3
-        addl %ebp,%ecx                                      # ecx = ARGB8888 of pixel 2
-
-        shll $24,%ebp                                       # ebp = [b][0][0][0] of pixel 2
-        movb 5(%esi),%dl                                    # edx = upper byte pixel 3
-
-        shrl $8,%ecx                                        # ecx = [0][0][r][g] pixel 2
-        addl %ebp,%eax                                      # eax = [b2][r1][g1][b1] (done)
-
-        movl %eax,(%edi)                                    # store dword 1
-        movl _ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%eax    # eax = ARGB8888 of upper byte pixel 3
-
-        movl _ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp      # ebp = ARGB8888 of lower byte pixel 3
-        movb 6(%esi),%bl                                    # ebx = lower byte pixel 4
-
-        addl %eax,%ebp                                      # ebp = ARGB8888 of pixel 3
-        movb 7(%esi),%dl                                    # edx = upper byte pixel 4
-
-        shll $16,%ebp                                       # ebp = [g][b][0][0] pixel 3
-
-        shrl $16,%eax                                       #  al = red component of pixel 3
-        addl %ecx,%ebp                                      # ebp = [g3][b3][r2][g2] (done)
-
-        movl %ebp,4(%edi)                                   # store dword 2
-        movl _ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ecx      # ebx = ARGB8888 of lower byte pixel 4
-
-        movl _ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebp    # ebp = ARGB8888 of upper byte pixel 4
-        movb 4*2+0(%esi),%bl                                # ebx = lower byte pixel 1
-
-        addl %ebp,%ecx                                      # ecx = ARGB8888 of pixel 4
-        movb 4*2+1(%esi),%dl                                # edx = upper byte pixel 1
-
-        shll $8,%ecx                                        # ecx = [r][g][b][0]
-        popl %ebp                                           # restore ebp
-
-        movb %al,%cl                                        # ecx = [r4][g4][b4][r3] (done)
-        addl $4*2,%esi
-
-        movl %ecx,8(%edi)                                   # store dword 3
-        addl $3*4,%edi
-
-        decl %ebp
-        jz _ConvertX86p16_24RGB888.L7
-
-        jmp _ConvertX86p16_24RGB888.L6
-
-_ConvertX86p16_24RGB888.L7:  # check tail
-    popl %ecx
-    andl $0b11,%ecx
-    jz _ConvertX86p16_24RGB888.L9
-
-_ConvertX86p16_24RGB888.L8:  # tail
-    movb (%esi),%dl
-    movl _ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax    # eax = ARGB8888 of lower byte
-    movb 1(%esi),%dl
-    movl _ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx  # ebx = ARGB8888 of upper byte
-    addl %ebx,%eax                                    # eax = ARGB8888 pixel
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    shrl $16,%eax
-    movb %al,2(%edi)
-    addl $2,%esi
-    addl $3,%edi
-    decl %ecx
-    jnz _ConvertX86p16_24RGB888.L8
-
-_ConvertX86p16_24RGB888.L9: popl %ebp
-    jmp _x86return
-
-
-
-
-
-_ConvertX86p16_24BGR888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p16_24BGR888.L3
-
-
-    # short loop
-    xorl %edx,%edx
-_ConvertX86p16_24BGR888.L1: movb (%esi),%dl
-    movl _ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    # eax = ABGR8888 of lower byte
-    movb 1(%esi),%dl
-    movl _ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  # ebx = ABGR8888 of upper byte
-    addl %ebx,%eax                                    # eax = ABGR8888 pixel
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    shrl $16,%eax
-    movb %al,2(%edi)
-    addl $2,%esi
-    addl $3,%edi
-    decl %ecx
-    jnz _ConvertX86p16_24BGR888.L1
-_ConvertX86p16_24BGR888.L2:
-    jmp _x86return
-
-
-_ConvertX86p16_24BGR888.L3:  # clear edx
-    xorl %edx,%edx
-
-_ConvertX86p16_24BGR888.L4:  # head
-    movl %edi,%eax
-    andl $0b11,%eax
-    jz _ConvertX86p16_24BGR888.L5
-    movb (%esi),%dl
-    movl _ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    # eax = ABGR8888 of lower byte
-    movb 1(%esi),%dl
-    movl _ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  # ebx = ABGR8888 of upper byte
-    addl %ebx,%eax                                    # eax = ABGR8888 pixel
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    shrl $16,%eax
-    movb %al,2(%edi)
-    addl $2,%esi
-    addl $3,%edi
-    decl %ecx
-    jmp _ConvertX86p16_24BGR888.L4
-
-_ConvertX86p16_24BGR888.L5:  # unroll 4 times
-    pushl %ebp
-    movl %ecx,%ebp
-    shrl $2,%ebp
-
-    # clear ebx
-    xorl %ebx,%ebx
-
-    # save count
-    pushl %ecx
-
-    # prestep
-    movb (%esi),%bl                                     # ebx = lower byte pixel 1
-    movb 1(%esi),%dl                                    # edx = upper byte pixel 1
-
-_ConvertX86p16_24BGR888.L6: movl _ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax # eax = ABGR8888 of lower byte pixel 1
-        movb 2(%esi),%bl                                    # ebx = lower byte pixel 2
-
-        movl _ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ecx    # ecx = ABGR8888 of upper byte pixel 1
-        movb 3(%esi),%dl                                    # edx = upper byte pixel 2
-
-        pushl %ebp                                          # save ebp
-        addl %ecx,%eax                                      # eax = ABGR8888 of pixel 1
-
-        movl _ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx      # ecx = ABGR8888 of lower byte pixel 2
-        movl _ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    # ebp = ABGR8888 of upper byte pixel 2
-
-        movb 4(%esi),%bl                                    # ebx = lower byte pixel 3
-        addl %ebp,%ecx                                      # ecx = ABGR8888 of pixel 2
-
-        shll $24,%ebp                                       # ebp = [r][0][0][0] of pixel 2
-        movb 5(%esi),%dl                                    # edx = upper byte pixel 3
-
-        shrl $8,%ecx                                        # ecx = [0][0][b][g] pixel 2
-        addl %ebp,%eax                                      # eax = [r2][b1][g1][r1] (done)
-
-        movl %eax,(%edi)                                    # store dword 1
-        movl _ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    # ebp = ABGR8888 of upper byte pixel 3
-
-        movl _ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax      # eax = ABGR8888 of lower byte pixel 3
-        movb 6(%esi),%bl                                    # ebx = lower byte pixel 4
-
-        addl %eax,%ebp                                      # ebp = ABGR8888 of pixel 3
-        movb 7(%esi),%dl                                    # edx = upper byte pixel 4
-
-        shll $16,%ebp                                       # ebp = [g][r][0][0] pixel 3
-
-        shrl $16,%eax                                       #  al = blue component of pixel 3
-        addl %ecx,%ebp                                      # ebp = [g3][r3][b2][g2] (done)
-
-        movl %ebp,4(%edi)                                   # store dword 2
-        movl _ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx      # ebx = ABGR8888 of lower byte pixel 4
-
-        movl _ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    # ebp = ABGR8888 of upper byte pixel 4
-        movb 4*2+0(%esi),%bl                                # ebx = lower byte pixel 1
-
-        addl %ebp,%ecx                                      # ecx = ABGR8888 of pixel 4
-        movb 4*2+1(%esi),%dl                                # edx = upper byte pixel 1
-
-        shll $8,%ecx                                        # ecx = [b][g][r][0]
-        popl %ebp                                           # restore ebp
-
-        movb %al,%cl                                        # ecx = [b4][g4][r4][b3] (done)
-        addl $4*2,%esi
-
-        movl %ecx,8(%edi)                                   # store dword 3
-        addl $3*4,%edi
-
-        decl %ebp
-        jz _ConvertX86p16_24BGR888.L7
-
-        jmp _ConvertX86p16_24BGR888.L6
-
-_ConvertX86p16_24BGR888.L7:  # check tail
-    popl %ecx
-    andl $0b11,%ecx
-    jz _ConvertX86p16_24BGR888.L9
-
-_ConvertX86p16_24BGR888.L8:  # tail
-    movb (%esi),%dl
-    movl _ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    # eax = ABGR8888 of lower byte
-    movb 1(%esi),%dl
-    movl _ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  # ebx = ABGR8888 of upper byte
-    addl %ebx,%eax                                    # eax = ABGR8888 pixel
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    shrl $16,%eax
-    movb %al,2(%edi)
-    addl $2,%esi
-    addl $3,%edi
-    decl %ecx
-    jnz _ConvertX86p16_24BGR888.L8
-
-_ConvertX86p16_24BGR888.L9: popl %ebp
-    jmp _x86return
-
-
-
-
-_ConvertX86p16_16BGR565:
-
-    # check short
-    cmpl $16,%ecx
-    ja _ConvertX86p16_16BGR565.L3
-
-
-_ConvertX86p16_16BGR565.L1:  # short loop
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    shrl $11,%eax
-    andl $0b11111,%eax
-    andl $0b11111100000,%ebx
-    shll $11,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-    decl %ecx
-    jnz _ConvertX86p16_16BGR565.L1
-_ConvertX86p16_16BGR565.L2:
-    jmp _x86return
-
-_ConvertX86p16_16BGR565.L3:  # head
-    movl %edi,%eax
-    andl $0b11,%eax
-    jz _ConvertX86p16_16BGR565.L4
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    shrl $11,%eax
-    andl $0b11111,%eax
-    andl $0b11111100000,%ebx
-    shll $11,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-    decl %ecx
-
-_ConvertX86p16_16BGR565.L4:  # save count
-    pushl %ecx
-
-    # unroll twice
-    shrl %ecx
-
-    # point arrays to end
-    leal (%esi,%ecx,4),%esi
-    leal (%edi,%ecx,4),%edi
-
-    # negative counter
-    negl %ecx
-    jmp _ConvertX86p16_16BGR565.L6
-
-_ConvertX86p16_16BGR565.L5: movl %eax,-4(%edi,%ecx,4)
-_ConvertX86p16_16BGR565.L6: movl (%esi,%ecx,4),%eax
-
-        movl (%esi,%ecx,4),%ebx
-        andl $0x07E007E0,%eax
-
-        movl (%esi,%ecx,4),%edx
-        andl $0x0F800F800,%ebx
-
-        shrl $11,%ebx
-        andl $0x001F001F,%edx
-
-        shll $11,%edx
-        addl %ebx,%eax
-
-        addl %edx,%eax
-        incl %ecx
-
-        jnz _ConvertX86p16_16BGR565.L5
-
-    movl %eax,-4(%edi,%ecx,4)
-
-    # tail
-    popl %ecx
-    andl $1,%ecx
-    jz _ConvertX86p16_16BGR565.L7
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    shrl $11,%eax
-    andl $0b11111,%eax
-    andl $0b11111100000,%ebx
-    shll $11,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-
-_ConvertX86p16_16BGR565.L7:
-    jmp _x86return
-
-
-
-
-
-
-_ConvertX86p16_16RGB555:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p16_16RGB555.L3
-
-
-_ConvertX86p16_16RGB555.L1:  # short loop
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    shrl %ebx
-    andl $0b0111111111100000,%ebx
-    andl $0b0000000000011111,%eax
-    addl %ebx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-    decl %ecx
-    jnz _ConvertX86p16_16RGB555.L1
-_ConvertX86p16_16RGB555.L2:
-    jmp _x86return
-
-_ConvertX86p16_16RGB555.L3:  # head
-    movl %edi,%eax
-    andl $0b11,%eax
-    jz _ConvertX86p16_16RGB555.L4
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    shrl %ebx
-    andl $0b0111111111100000,%ebx
-    andl $0b0000000000011111,%eax
-    addl %ebx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-    decl %ecx
-
-_ConvertX86p16_16RGB555.L4:  # save ebp
-    pushl %ebp
-
-    # save count
-    pushl %ecx
-
-    # unroll four times
-    shrl $2,%ecx
-
-    # point arrays to end
-    leal (%esi,%ecx,8),%esi
-    leal (%edi,%ecx,8),%edi
-
-    # negative counter
-    xorl %ebp,%ebp
-    subl %ecx,%ebp
-
-_ConvertX86p16_16RGB555.L5: movl (%esi,%ebp,8),%eax # agi?
-        movl 4(%esi,%ebp,8),%ecx
-
-        movl %eax,%ebx
-        movl %ecx,%edx
-
-        andl $0x0FFC0FFC0,%eax
-        andl $0x0FFC0FFC0,%ecx
-
-        shrl %eax
-        andl $0x001F001F,%ebx
-
-        shrl %ecx
-        andl $0x001F001F,%edx
-
-        addl %ebx,%eax
-        addl %edx,%ecx
-
-        movl %eax,(%edi,%ebp,8)
-        movl %ecx,4(%edi,%ebp,8)
-
-        incl %ebp
-        jnz _ConvertX86p16_16RGB555.L5
-
-    # tail
-    popl %ecx
-_ConvertX86p16_16RGB555.L6: andl $0b11,%ecx
-    jz _ConvertX86p16_16RGB555.L7
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    shrl %ebx
-    andl $0b0111111111100000,%ebx
-    andl $0b0000000000011111,%eax
-    addl %ebx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-    decl %ecx
-    jmp _ConvertX86p16_16RGB555.L6
-
-_ConvertX86p16_16RGB555.L7: popl %ebp
-    jmp _x86return
-
-
-
-
-
-
-_ConvertX86p16_16BGR555:
-
-    # check short
-    cmpl $16,%ecx
-    ja _ConvertX86p16_16BGR555.L3
-
-
-_ConvertX86p16_16BGR555.L1:  # short loop
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    shrl $11,%eax
-    andl $0b11111,%eax
-    shrl %ebx
-    andl $0b1111100000,%ebx
-    shll $10,%edx
-    andl $0b0111110000000000,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-    decl %ecx
-    jnz _ConvertX86p16_16BGR555.L1
-_ConvertX86p16_16BGR555.L2:
-    jmp _x86return
-
-_ConvertX86p16_16BGR555.L3:  # head
-    movl %edi,%eax
-    andl $0b11,%eax
-    jz _ConvertX86p16_16BGR555.L4
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    shrl $11,%eax
-    andl $0b11111,%eax
-    shrl %ebx
-    andl $0b1111100000,%ebx
-    shll $10,%edx
-    andl $0b0111110000000000,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-    decl %ecx
-
-_ConvertX86p16_16BGR555.L4:  # save count
-    pushl %ecx
-
-    # unroll twice
-    shrl %ecx
-
-    # point arrays to end
-    leal (%esi,%ecx,4),%esi
-    leal (%edi,%ecx,4),%edi
-
-    # negative counter
-    negl %ecx
-    jmp _ConvertX86p16_16BGR555.L6
-
-_ConvertX86p16_16BGR555.L5: movl %eax,-4(%edi,%ecx,4)
-_ConvertX86p16_16BGR555.L6: movl (%esi,%ecx,4),%eax
-
-        shrl %eax
-        movl (%esi,%ecx,4),%ebx
-
-        andl $0x03E003E0,%eax
-        movl (%esi,%ecx,4),%edx
-
-        andl $0x0F800F800,%ebx
-
-        shrl $11,%ebx
-        andl $0x001F001F,%edx
-
-        shll $10,%edx
-        addl %ebx,%eax
-
-        addl %edx,%eax
-        incl %ecx
-
-        jnz _ConvertX86p16_16BGR555.L5
-
-    movl %eax,-4(%edi,%ecx,4)
-
-    # tail
-    popl %ecx
-    andl $1,%ecx
-    jz _ConvertX86p16_16BGR555.L7
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    shrl $11,%eax
-    andl $0b11111,%eax
-    shrl %ebx
-    andl $0b1111100000,%ebx
-    shll $10,%edx
-    andl $0b0111110000000000,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    movb %ah,1(%edi)
-    addl $2,%esi
-    addl $2,%edi
-
-_ConvertX86p16_16BGR555.L7:
-    jmp _x86return
-
-
-
-
-
-
-_ConvertX86p16_8RGB332:
-
-    # check short
-    cmpl $16,%ecx
-    ja _ConvertX86p16_8RGB332.L3
-
-
-_ConvertX86p16_8RGB332.L1:  # short loop
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    andl $0b11000,%eax          # blue
-    shrl $3,%eax
-    andl $0b11100000000,%ebx    # green
-    shrl $6,%ebx
-    andl $0b1110000000000000,%edx # red
-    shrl $8,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    addl $2,%esi
-    incl %edi
-    decl %ecx
-    jnz _ConvertX86p16_8RGB332.L1
-_ConvertX86p16_8RGB332.L2:
-    jmp _x86return
-
-_ConvertX86p16_8RGB332.L3: movl %edi,%eax
-    andl $0b11,%eax
-    jz _ConvertX86p16_8RGB332.L4
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    andl $0b11000,%eax          # blue
-    shrl $3,%eax
-    andl $0b11100000000,%ebx    # green
-    shrl $6,%ebx
-    andl $0b1110000000000000,%edx # red
-    shrl $8,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    addl $2,%esi
-    incl %edi
-    decl %ecx
-    jmp _ConvertX86p16_8RGB332.L3
-
-_ConvertX86p16_8RGB332.L4:  # save ebp
-    pushl %ebp
-
-    # save count
-    pushl %ecx
-
-    # unroll 4 times
-    shrl $2,%ecx
-
-    # prestep
-    movb (%esi),%dl
-    movb 1(%esi),%bl
-    movb 2(%esi),%dh
-
-_ConvertX86p16_8RGB332.L5: shll $16,%edx
-        movb 3(%esi),%bh
-
-        shll $16,%ebx
-        movb 4(%esi),%dl
-
-        movb 6(%esi),%dh
-        movb 5(%esi),%bl
-
-        andl $0b00011000000110000001100000011000,%edx
-        movb 7(%esi),%bh
-
-        rorl $16+3,%edx
-        movl %ebx,%eax                                  # setup eax for reds
-
-        andl $0b00000111000001110000011100000111,%ebx
-        andl $0b11100000111000001110000011100000,%eax   # reds
-
-        rorl $16-2,%ebx
-        addl $8,%esi
-
-        rorl $16,%eax
-        addl $4,%edi
-
-        addl %ebx,%eax
-        movb 1(%esi),%bl                                # greens
-
-        addl %edx,%eax
-        movb (%esi),%dl                                 # blues
-
-        movl %eax,-4(%edi)
-        movb 2(%esi),%dh
-
-        decl %ecx
-        jnz _ConvertX86p16_8RGB332.L5
-
-    # check tail
-    popl %ecx
-    andl $0b11,%ecx
-    jz _ConvertX86p16_8RGB332.L7
-
-_ConvertX86p16_8RGB332.L6:  # tail
-    movb (%esi),%al
-    movb 1(%esi),%ah
-    movl %eax,%ebx
-    movl %eax,%edx
-    andl $0b11000,%eax          # blue
-    shrl $3,%eax
-    andl $0b11100000000,%ebx    # green
-    shrl $6,%ebx
-    andl $0b1110000000000000,%edx # red
-    shrl $8,%edx
-    addl %ebx,%eax
-    addl %edx,%eax
-    movb %al,(%edi)
-    addl $2,%esi
-    incl %edi
-    decl %ecx
-    jnz _ConvertX86p16_8RGB332.L6
-
-_ConvertX86p16_8RGB332.L7: popl %ebp
-    jmp _x86return
-

+ 1143 - 0
packages/hermes/src/i386/x86p_16.inc

@@ -0,0 +1,1143 @@
+{
+    x86 format converters for HERMES
+    Copyright (c) 1998 Glenn Fielder ([email protected])
+    Routines adjusted for Hermes by Christian Nentwich ([email protected])
+    Used with permission.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+
+procedure ConvertX86p16_32RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+
+    // short loop
+    xorl %ebx,%ebx
+.L1: movb (%esi),%bl              // ebx = lower byte pixel 1
+    movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax   // eax = ARGB8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
+    movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+
+.L3:  // save ebp
+    pushl %ebp
+
+    // save count
+    pushl %ecx
+
+    // unroll twice
+    movl %ecx,%ebp
+    shrl $1,%ebp
+
+    // point arrays to end
+    leal (%esi,%ebp,4),%esi
+    leal (%edi,%ebp,8),%edi
+
+    // negative counter
+    negl %ebp
+
+    // clear
+    xorl %ebx,%ebx
+    xorl %ecx,%ecx
+
+    // prestep
+    movb (%esi,%ebp,4),%cl
+    movb 1(%esi,%ebp,4),%bl
+
+.L4:    movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
+        movb 2(%esi,%ebp,4),%cl
+
+        movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
+        movb 3(%esi,%ebp,4),%bl
+
+        addl %edx,%eax
+        movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
+
+        movl %eax,(%edi,%ebp,8)
+        movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
+
+        addl %edx,%eax
+        movb 4(%esi,%ebp,4),%cl
+
+        movl %eax,4(%edi,%ebp,8)
+        movb 5(%esi,%ebp,4),%bl
+
+        incl %ebp
+        jnz .L4
+
+    // tail
+    popl %ecx
+    andl $1,%ecx
+    jz .L6
+    xorl %ebx,%ebx
+    movb (%esi),%bl                                      // ebx = lower byte pixel 1
+    movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax   // eax = ARGB8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
+    movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+
+.L6: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+
+
+procedure ConvertX86p16_32BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+
+    // short loop
+    xorl %ebx,%ebx
+.L1: movb (%esi),%bl                                     // ebx = lower byte pixel 1
+    movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax    // eax = ABGR8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
+    movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx  // edx = ABGR8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // save ebp
+    pushl %ebp
+
+    // save count
+    pushl %ecx
+
+    // unroll twice
+    movl %ecx,%ebp
+    shrl $1,%ebp
+
+    // point arrays to end
+    leal (%esi,%ebp,4),%esi
+    leal (%edi,%ebp,8),%edi
+
+    // negative counter
+    negl %ebp
+
+    // clear
+    xorl %ebx,%ebx
+    xorl %ecx,%ecx
+
+    // prestep
+    movb (%esi,%ebp,4),%cl
+    movb 1(%esi,%ebp,4),%bl
+
+.L4:    movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
+        movb 2(%esi,%ebp,4),%cl
+
+        movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
+        movb 3(%esi,%ebp,4),%bl
+
+        addl %edx,%eax
+        movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
+
+        movl %eax,(%edi,%ebp,8)
+        movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
+
+        addl %edx,%eax
+        movb 4(%esi,%ebp,4),%cl
+
+        movl %eax,4(%edi,%ebp,8)
+        movb 5(%esi,%ebp,4),%bl
+
+        incl %ebp
+        jnz .L4
+
+    // tail
+    popl %ecx
+    andl $1,%ecx
+    jz .L6
+    xorl %ebx,%ebx
+    movb (%esi),%bl                                      // ebx = lower byte pixel 1
+    movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax    // eax = ABGR8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
+    movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx  // edx = ABGR8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+
+.L6: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+procedure ConvertX86p16_32RGBA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+
+    // short loop
+    xorl %ebx,%ebx
+.L1: movb (%esi),%bl                                      // ebx = lower byte pixel 1
+    movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax   // eax = RGBA8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
+    movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // save ebp
+    pushl %ebp
+
+    // save count
+    pushl %ecx
+
+    // unroll twice
+    movl %ecx,%ebp
+    shrl $1,%ebp
+
+    // point arrays to end
+    leal (%esi,%ebp,4),%esi
+    leal (%edi,%ebp,8),%edi
+
+    // negative counter
+    negl %ebp
+
+    // clear
+    xorl %ebx,%ebx
+    xorl %ecx,%ecx
+
+    // prestep
+    movb (%esi,%ebp,4),%cl
+    movb 1(%esi,%ebp,4),%bl
+
+.L4:    movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
+        movb 2(%esi,%ebp,4),%cl
+
+        movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
+        movb 3(%esi,%ebp,4),%bl
+
+        addl %edx,%eax
+        movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
+
+        movl %eax,(%edi,%ebp,8)
+        movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
+
+        addl %edx,%eax
+        movb 4(%esi,%ebp,4),%cl
+
+        movl %eax,4(%edi,%ebp,8)
+        movb 5(%esi,%ebp,4),%bl
+
+        incl %ebp
+        jnz .L4
+
+    // tail
+    popl %ecx
+    andl $1,%ecx
+    jz .L6
+    xorl %ebx,%ebx
+    movb (%esi),%bl                                       // ebx = lower byte pixel 1
+    movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax   // eax = RGBA8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
+    movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+
+.L6: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+
+procedure ConvertX86p16_32BGRA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+    // short loop
+    xorl %ebx,%ebx
+.L1: movb (%esi),%bl                                      // ebx = lower byte pixel 1
+    movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax    // eax = BGRA8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
+    movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx  // edx = BGRA8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // save ebp
+    pushl %ebp
+
+    // save count
+    pushl %ecx
+
+    // unroll twice
+    movl %ecx,%ebp
+    shrl $1,%ebp
+
+    // point arrays to end
+    leal (%esi,%ebp,4),%esi
+    leal (%edi,%ebp,8),%edi
+
+    // negative counter
+    negl %ebp
+
+    // clear
+    xorl %ebx,%ebx
+    xorl %ecx,%ecx
+
+    // prestep
+    movb (%esi,%ebp,4),%cl
+    movb 1(%esi,%ebp,4),%bl
+
+.L4:    movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
+        movb 2(%esi,%ebp,4),%cl
+
+        movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
+        movb 3(%esi,%ebp,4),%bl
+
+        addl %edx,%eax
+        movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
+
+        movl %eax,(%edi,%ebp,8)
+        movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
+
+        addl %edx,%eax
+        movb 4(%esi,%ebp,4),%cl
+
+        movl %eax,4(%edi,%ebp,8)
+        movb 5(%esi,%ebp,4),%bl
+
+        incl %ebp
+        jnz .L4
+
+    // tail
+    popl %ecx
+    andl $1,%ecx
+    jz .L6
+    xorl %ebx,%ebx
+    movb (%esi),%bl                                       // ebx = lower byte pixel 1
+    movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax   // eax = BGRA8888 of lower byte pixel 1
+    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
+    movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx // edx = BGRA8888 of upper byte pixel 1
+    addl %edx,%eax
+    movl %eax,(%edi)
+    addl $2,%esi
+    addl $4,%edi
+
+.L6: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+
+procedure ConvertX86p16_24RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+
+    // short loop
+    xorl %edx,%edx
+.L1: movb (%esi),%dl
+    movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax     // eax = ARGB8888 of lower byte
+    movb 1(%esi),%dl
+    movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx   // ebx = ARGB8888 of upper byte
+    addl %ebx,%eax                                        // eax = ARGB8888 pixel
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    shrl $16,%eax
+    movb %al,2(%edi)
+    addl $2,%esi
+    addl $3,%edi
+    decl %ecx
+    jnz .L1
+.L2: jmp _X86RETURN
+
+
+.L3:  // clear edx
+    xorl %edx,%edx
+
+.L4:  // head
+    movl %edi,%eax
+    andl $0b11,%eax
+    jz .L5
+    movb (%esi),%dl
+    movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax     // eax = ARGB8888 of lower byte
+    movb 1(%esi),%dl
+    movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx   // ebx = ARGB8888 of upper byte
+    addl %ebx,%eax                                        // eax = ARGB8888 pixel
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    shrl $16,%eax
+    movb %al,2(%edi)
+    addl $2,%esi
+    addl $3,%edi
+    decl %ecx
+    jmp .L4
+
+.L5:  // unroll 4 times
+    pushl %ebp
+    movl %ecx,%ebp
+    shrl $2,%ebp
+
+    // clear ebx
+    xorl %ebx,%ebx
+
+    // save count
+    pushl %ecx
+
+    // prestep
+    movb (%esi),%bl                                     // ebx = lower byte pixel 1
+    movb 1(%esi),%dl                                    // edx = upper byte pixel 1
+
+.L6:    movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax   // eax = ARGB8888 of lower byte pixel 1
+        movb 2(%esi),%bl                                    // ebx = lower byte pixel 2
+
+        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx    // ecx = ARGB8888 of upper byte pixel 1
+        movb 3(%esi),%dl                                    // edx = upper byte pixel 2
+
+        pushl %ebp                                          // save ebp
+        addl %ecx,%eax                                      // eax = ARGB8888 of pixel 1
+
+        movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp      // ebp = ARGB8888 of lower byte pixel 2
+        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx    // ecx = ARGB8888 of upper byte pixel 2
+
+        movb 4(%esi),%bl                                    // ebx = lower byte pixel 3
+        addl %ebp,%ecx                                      // ecx = ARGB8888 of pixel 2
+
+        shll $24,%ebp                                       // ebp = [b][0][0][0] of pixel 2
+        movb 5(%esi),%dl                                    // edx = upper byte pixel 3
+
+        shrl $8,%ecx                                        // ecx = [0][0][r][g] pixel 2
+        addl %ebp,%eax                                      // eax = [b2][r1][g1][b1] (done)
+
+        movl %eax,(%edi)                                    // store dword 1
+        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%eax    // eax = ARGB8888 of upper byte pixel 3
+
+        movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp      // ebp = ARGB8888 of lower byte pixel 3
+        movb 6(%esi),%bl                                    // ebx = lower byte pixel 4
+
+        addl %eax,%ebp                                      // ebp = ARGB8888 of pixel 3
+        movb 7(%esi),%dl                                    // edx = upper byte pixel 4
+
+        shll $16,%ebp                                       // ebp = [g][b][0][0] pixel 3
+
+        shrl $16,%eax                                       //  al = red component of pixel 3
+        addl %ecx,%ebp                                      // ebp = [g3][b3][r2][g2] (done)
+
+        movl %ebp,4(%edi)                                   // store dword 2
+        movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ecx      // ebx = ARGB8888 of lower byte pixel 4
+
+        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebp    // ebp = ARGB8888 of upper byte pixel 4
+        movb 4*2+0(%esi),%bl                                // ebx = lower byte pixel 1
+
+        addl %ebp,%ecx                                      // ecx = ARGB8888 of pixel 4
+        movb 4*2+1(%esi),%dl                                // edx = upper byte pixel 1
+
+        shll $8,%ecx                                        // ecx = [r][g][b][0]
+        popl %ebp                                           // restore ebp
+
+        movb %al,%cl                                        // ecx = [r4][g4][b4][r3] (done)
+        addl $4*2,%esi
+
+        movl %ecx,8(%edi)                                   // store dword 3
+        addl $3*4,%edi
+
+        decl %ebp
+        jz .L7
+
+        jmp .L6
+
+.L7:  // check tail
+    popl %ecx
+    andl $0b11,%ecx
+    jz .L9
+
+.L8:  // tail
+    movb (%esi),%dl
+    movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax    // eax = ARGB8888 of lower byte
+    movb 1(%esi),%dl
+    movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx  // ebx = ARGB8888 of upper byte
+    addl %ebx,%eax                                    // eax = ARGB8888 pixel
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    shrl $16,%eax
+    movb %al,2(%edi)
+    addl $2,%esi
+    addl $3,%edi
+    decl %ecx
+    jnz .L8
+
+.L9: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+
+procedure ConvertX86p16_24BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+
+    // short loop
+    xorl %edx,%edx
+.L1: movb (%esi),%dl
+    movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    // eax = ABGR8888 of lower byte
+    movb 1(%esi),%dl
+    movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  // ebx = ABGR8888 of upper byte
+    addl %ebx,%eax                                    // eax = ABGR8888 pixel
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    shrl $16,%eax
+    movb %al,2(%edi)
+    addl $2,%esi
+    addl $3,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+
+.L3:  // clear edx
+    xorl %edx,%edx
+
+.L4:  // head
+    movl %edi,%eax
+    andl $0b11,%eax
+    jz .L5
+    movb (%esi),%dl
+    movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    // eax = ABGR8888 of lower byte
+    movb 1(%esi),%dl
+    movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  // ebx = ABGR8888 of upper byte
+    addl %ebx,%eax                                    // eax = ABGR8888 pixel
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    shrl $16,%eax
+    movb %al,2(%edi)
+    addl $2,%esi
+    addl $3,%edi
+    decl %ecx
+    jmp .L4
+
+.L5:  // unroll 4 times
+    pushl %ebp
+    movl %ecx,%ebp
+    shrl $2,%ebp
+
+    // clear ebx
+    xorl %ebx,%ebx
+
+    // save count
+    pushl %ecx
+
+    // prestep
+    movb (%esi),%bl                                     // ebx = lower byte pixel 1
+    movb 1(%esi),%dl                                    // edx = upper byte pixel 1
+
+.L6:    movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1
+        movb 2(%esi),%bl                                    // ebx = lower byte pixel 2
+
+        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ecx    // ecx = ABGR8888 of upper byte pixel 1
+        movb 3(%esi),%dl                                    // edx = upper byte pixel 2
+
+        pushl %ebp                                          // save ebp
+        addl %ecx,%eax                                      // eax = ABGR8888 of pixel 1
+
+        movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx      // ecx = ABGR8888 of lower byte pixel 2
+        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    // ebp = ABGR8888 of upper byte pixel 2
+
+        movb 4(%esi),%bl                                    // ebx = lower byte pixel 3
+        addl %ebp,%ecx                                      // ecx = ABGR8888 of pixel 2
+
+        shll $24,%ebp                                       // ebp = [r][0][0][0] of pixel 2
+        movb 5(%esi),%dl                                    // edx = upper byte pixel 3
+
+        shrl $8,%ecx                                        // ecx = [0][0][b][g] pixel 2
+        addl %ebp,%eax                                      // eax = [r2][b1][g1][r1] (done)
+
+        movl %eax,(%edi)                                    // store dword 1
+        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    // ebp = ABGR8888 of upper byte pixel 3
+
+        movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax      // eax = ABGR8888 of lower byte pixel 3
+        movb 6(%esi),%bl                                    // ebx = lower byte pixel 4
+
+        addl %eax,%ebp                                      // ebp = ABGR8888 of pixel 3
+        movb 7(%esi),%dl                                    // edx = upper byte pixel 4
+
+        shll $16,%ebp                                       // ebp = [g][r][0][0] pixel 3
+
+        shrl $16,%eax                                       //  al = blue component of pixel 3
+        addl %ecx,%ebp                                      // ebp = [g3][r3][b2][g2] (done)
+
+        movl %ebp,4(%edi)                                   // store dword 2
+        movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx      // ebx = ABGR8888 of lower byte pixel 4
+
+        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    // ebp = ABGR8888 of upper byte pixel 4
+        movb 4*2+0(%esi),%bl                                // ebx = lower byte pixel 1
+
+        addl %ebp,%ecx                                      // ecx = ABGR8888 of pixel 4
+        movb 4*2+1(%esi),%dl                                // edx = upper byte pixel 1
+
+        shll $8,%ecx                                        // ecx = [b][g][r][0]
+        popl %ebp                                           // restore ebp
+
+        movb %al,%cl                                        // ecx = [b4][g4][r4][b3] (done)
+        addl $4*2,%esi
+
+        movl %ecx,8(%edi)                                   // store dword 3
+        addl $3*4,%edi
+
+        decl %ebp
+        jz .L7
+
+        jmp .L6
+
+.L7:  // check tail
+    popl %ecx
+    andl $0b11,%ecx
+    jz .L9
+
+.L8:  // tail
+    movb (%esi),%dl
+    movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    // eax = ABGR8888 of lower byte
+    movb 1(%esi),%dl
+    movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  // ebx = ABGR8888 of upper byte
+    addl %ebx,%eax                                    // eax = ABGR8888 pixel
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    shrl $16,%eax
+    movb %al,2(%edi)
+    addl $2,%esi
+    addl $3,%edi
+    decl %ecx
+    jnz .L8
+
+.L9: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+procedure ConvertX86p16_16BGR565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $16,%ecx
+    ja .L3
+
+
+.L1:  // short loop
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    shrl $11,%eax
+    andl $0b11111,%eax
+    andl $0b11111100000,%ebx
+    shll $11,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // head
+    movl %edi,%eax
+    andl $0b11,%eax
+    jz .L4
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    shrl $11,%eax
+    andl $0b11111,%eax
+    andl $0b11111100000,%ebx
+    shll $11,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+    decl %ecx
+
+.L4:  // save count
+    pushl %ecx
+
+    // unroll twice
+    shrl $1,%ecx
+
+    // point arrays to end
+    leal (%esi,%ecx,4),%esi
+    leal (%edi,%ecx,4),%edi
+
+    // negative counter
+    negl %ecx
+    jmp .L6
+
+.L5:    movl %eax,-4(%edi,%ecx,4)
+.L6:    movl (%esi,%ecx,4),%eax
+
+        movl (%esi,%ecx,4),%ebx
+        andl $0x07E007E0,%eax
+
+        movl (%esi,%ecx,4),%edx
+        andl $0x0F800F800,%ebx
+
+        shrl $11,%ebx
+        andl $0x001F001F,%edx
+
+        shll $11,%edx
+        addl %ebx,%eax
+
+        addl %edx,%eax
+        incl %ecx
+
+        jnz .L5
+
+    movl %eax,-4(%edi,%ecx,4)
+
+    // tail
+    popl %ecx
+    andl $1,%ecx
+    jz .L7
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    shrl $11,%eax
+    andl $0b11111,%eax
+    andl $0b11111100000,%ebx
+    shll $11,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+
+.L7:
+    jmp _X86RETURN
+end;
+
+
+
+
+procedure ConvertX86p16_16RGB555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+
+.L1:  // short loop
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    shrl $1,%ebx
+    andl $0b0111111111100000,%ebx
+    andl $0b0000000000011111,%eax
+    addl %ebx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // head
+    movl %edi,%eax
+    andl $0b11,%eax
+    jz .L4
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    shrl $1,%ebx
+    andl $0b0111111111100000,%ebx
+    andl $0b0000000000011111,%eax
+    addl %ebx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+    decl %ecx
+
+.L4:  // save ebp
+    pushl %ebp
+
+    // save count
+    pushl %ecx
+
+    // unroll four times
+    shrl $2,%ecx
+
+    // point arrays to end
+    leal (%esi,%ecx,8),%esi
+    leal (%edi,%ecx,8),%edi
+
+    // negative counter
+    xorl %ebp,%ebp
+    subl %ecx,%ebp
+
+.L5:    movl (%esi,%ebp,8),%eax // agi?
+        movl 4(%esi,%ebp,8),%ecx
+
+        movl %eax,%ebx
+        movl %ecx,%edx
+
+        andl $0x0FFC0FFC0,%eax
+        andl $0x0FFC0FFC0,%ecx
+
+        shrl $1,%eax
+        andl $0x001F001F,%ebx
+
+        shrl $1,%ecx
+        andl $0x001F001F,%edx
+
+        addl %ebx,%eax
+        addl %edx,%ecx
+
+        movl %eax,(%edi,%ebp,8)
+        movl %ecx,4(%edi,%ebp,8)
+
+        incl %ebp
+        jnz .L5
+
+    // tail
+    popl %ecx
+.L6: andl $0b11,%ecx
+    jz .L7
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    shrl $1,%ebx
+    andl $0b0111111111100000,%ebx
+    andl $0b0000000000011111,%eax
+    addl %ebx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+    decl %ecx
+    jmp .L6
+
+.L7: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+
+
+procedure ConvertX86p16_16BGR555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $16,%ecx
+    ja .L3
+
+
+.L1:  // short loop
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    shrl $11,%eax
+    andl $0b11111,%eax
+    shrl $1,%ebx
+    andl $0b1111100000,%ebx
+    shll $10,%edx
+    andl $0b0111110000000000,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // head
+    movl %edi,%eax
+    andl $0b11,%eax
+    jz .L4
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    shrl $11,%eax
+    andl $0b11111,%eax
+    shrl $1,%ebx
+    andl $0b1111100000,%ebx
+    shll $10,%edx
+    andl $0b0111110000000000,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+    decl %ecx
+
+.L4:  // save count
+    pushl %ecx
+
+    // unroll twice
+    shrl $1,%ecx
+
+    // point arrays to end
+    leal (%esi,%ecx,4),%esi
+    leal (%edi,%ecx,4),%edi
+
+    // negative counter
+    negl %ecx
+    jmp .L6
+
+.L5:    movl %eax,-4(%edi,%ecx,4)
+.L6:    movl (%esi,%ecx,4),%eax
+
+        shrl $1,%eax
+        movl (%esi,%ecx,4),%ebx
+
+        andl $0x03E003E0,%eax
+        movl (%esi,%ecx,4),%edx
+
+        andl $0x0F800F800,%ebx
+
+        shrl $11,%ebx
+        andl $0x001F001F,%edx
+
+        shll $10,%edx
+        addl %ebx,%eax
+
+        addl %edx,%eax
+        incl %ecx
+
+        jnz .L5
+
+    movl %eax,-4(%edi,%ecx,4)
+
+    // tail
+    popl %ecx
+    andl $1,%ecx
+    jz .L7
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    shrl $11,%eax
+    andl $0b11111,%eax
+    shrl $1,%ebx
+    andl $0b1111100000,%ebx
+    shll $10,%edx
+    andl $0b0111110000000000,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    movb %ah,1(%edi)
+    addl $2,%esi
+    addl $2,%edi
+
+.L7:
+    jmp _X86RETURN
+end;
+
+
+
+
+
+procedure ConvertX86p16_8RGB332(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $16,%ecx
+    ja .L3
+
+
+.L1:  // short loop
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    andl $0b11000,%eax          // blue
+    shrl $3,%eax
+    andl $0b11100000000,%ebx    // green
+    shrl $6,%ebx
+    andl $0b1110000000000000,%edx // red
+    shrl $8,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    addl $2,%esi
+    incl %edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3: movl %edi,%eax
+    andl $0b11,%eax
+    jz .L4
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    andl $0b11000,%eax          // blue
+    shrl $3,%eax
+    andl $0b11100000000,%ebx    // green
+    shrl $6,%ebx
+    andl $0b1110000000000000,%edx // red
+    shrl $8,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    addl $2,%esi
+    incl %edi
+    decl %ecx
+    jmp .L3
+
+.L4:  // save ebp
+    pushl %ebp
+
+    // save count
+    pushl %ecx
+
+    // unroll 4 times
+    shrl $2,%ecx
+
+    // prestep
+    movb (%esi),%dl
+    movb 1(%esi),%bl
+    movb 2(%esi),%dh
+
+.L5: shll $16,%edx
+        movb 3(%esi),%bh
+
+        shll $16,%ebx
+        movb 4(%esi),%dl
+
+        movb 6(%esi),%dh
+        movb 5(%esi),%bl
+
+        andl $0b00011000000110000001100000011000,%edx
+        movb 7(%esi),%bh
+
+        rorl $16+3,%edx
+        movl %ebx,%eax                                  // setup eax for reds
+
+        andl $0b00000111000001110000011100000111,%ebx
+        andl $0b11100000111000001110000011100000,%eax   // reds
+
+        rorl $16-2,%ebx
+        addl $8,%esi
+
+        rorl $16,%eax
+        addl $4,%edi
+
+        addl %ebx,%eax
+        movb 1(%esi),%bl                                // greens
+
+        addl %edx,%eax
+        movb (%esi),%dl                                 // blues
+
+        movl %eax,-4(%edi)
+        movb 2(%esi),%dh
+
+        decl %ecx
+        jnz .L5
+
+    // check tail
+    popl %ecx
+    andl $0b11,%ecx
+    jz .L7
+
+.L6:  // tail
+    movb (%esi),%al
+    movb 1(%esi),%ah
+    movl %eax,%ebx
+    movl %eax,%edx
+    andl $0b11000,%eax          // blue
+    shrl $3,%eax
+    andl $0b11100000000,%ebx    // green
+    shrl $6,%ebx
+    andl $0b1110000000000000,%edx // red
+    shrl $8,%edx
+    addl %ebx,%eax
+    addl %edx,%eax
+    movb %al,(%edi)
+    addl $2,%esi
+    incl %edi
+    decl %ecx
+    jnz .L6
+
+.L7: popl %ebp
+    jmp _X86RETURN
+end;

+ 0 - 1043
packages/hermes/src/i386/x86p_32.as

@@ -1,1043 +0,0 @@
-#
-# x86 format converters for HERMES
-# Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-# Most routines are (c) Glenn Fiedler ([email protected]), used with permission
-#
-
-
-
-.globl _ConvertX86p32_32BGR888
-.globl _ConvertX86p32_32RGBA888
-.globl _ConvertX86p32_32BGRA888
-.globl _ConvertX86p32_24RGB888
-.globl _ConvertX86p32_24BGR888
-.globl _ConvertX86p32_16RGB565
-.globl _ConvertX86p32_16BGR565
-.globl _ConvertX86p32_16RGB555
-.globl _ConvertX86p32_16BGR555
-.globl _ConvertX86p32_8RGB332
-
-.extern _x86return
-
-.text
-
-
-## _Convert_*
-## Paramters:
-##   ESI = source
-##   EDI = dest
-##   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
-## Destroys:
-##   EAX, EBX, EDX
-
-
-_ConvertX86p32_32BGR888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p32_32BGR888.L3
-
-_ConvertX86p32_32BGR888.L1:  # short loop
-    movl (%esi),%edx
-    bswapl %edx
-    rorl $8,%edx
-    movl %edx,(%edi)
-    addl $4,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p32_32BGR888.L1
-_ConvertX86p32_32BGR888.L2:
-    jmp _x86return
-
-_ConvertX86p32_32BGR888.L3:  # save ebp
-    pushl %ebp
-
-    # unroll four times
-    movl %ecx,%ebp
-    shrl $2,%ebp
-
-    # save count
-    pushl %ecx
-
-_ConvertX86p32_32BGR888.L4: movl (%esi),%eax
-        movl 4(%esi),%ebx
-
-        bswapl %eax
-
-        bswapl %ebx
-
-        rorl $8,%eax
-        movl 8(%esi),%ecx
-
-        rorl $8,%ebx
-        movl 12(%esi),%edx
-
-        bswapl %ecx
-
-        bswapl %edx
-
-        rorl $8,%ecx
-        movl %eax,(%edi)
-
-        rorl $8,%edx
-        movl %ebx,4(%edi)
-
-        movl %ecx,8(%edi)
-        movl %edx,12(%edi)
-
-        addl $16,%esi
-        addl $16,%edi
-
-        decl %ebp
-        jnz _ConvertX86p32_32BGR888.L4
-
-    # check tail
-    popl %ecx
-    andl $0b11,%ecx
-    jz _ConvertX86p32_32BGR888.L6
-
-_ConvertX86p32_32BGR888.L5:  # tail loop
-    movl (%esi),%edx
-    bswapl %edx
-    rorl $8,%edx
-    movl %edx,(%edi)
-    addl $4,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p32_32BGR888.L5
-
-_ConvertX86p32_32BGR888.L6: popl %ebp
-    jmp _x86return
-
-
-
-
-_ConvertX86p32_32RGBA888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p32_32RGBA888.L3
-
-_ConvertX86p32_32RGBA888.L1:  # short loop
-    movl (%esi),%edx
-    roll $8,%edx
-    movl %edx,(%edi)
-    addl $4,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p32_32RGBA888.L1
-_ConvertX86p32_32RGBA888.L2:
-    jmp _x86return
-
-_ConvertX86p32_32RGBA888.L3:  # save ebp
-    pushl %ebp
-
-    # unroll four times
-    movl %ecx,%ebp
-    shrl $2,%ebp
-
-    # save count
-    pushl %ecx
-
-_ConvertX86p32_32RGBA888.L4: movl (%esi),%eax
-        movl 4(%esi),%ebx
-
-        roll $8,%eax
-        movl 8(%esi),%ecx
-
-        roll $8,%ebx
-        movl 12(%esi),%edx
-
-        roll $8,%ecx
-        movl %eax,(%edi)
-
-        roll $8,%edx
-        movl %ebx,4(%edi)
-
-        movl %ecx,8(%edi)
-        movl %edx,12(%edi)
-
-        addl $16,%esi
-        addl $16,%edi
-
-        decl %ebp
-        jnz _ConvertX86p32_32RGBA888.L4
-
-    # check tail
-    popl %ecx
-    andl $0b11,%ecx
-    jz _ConvertX86p32_32RGBA888.L6
-
-_ConvertX86p32_32RGBA888.L5:  # tail loop
-    movl (%esi),%edx
-    roll $8,%edx
-    movl %edx,(%edi)
-    addl $4,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p32_32RGBA888.L5
-
-_ConvertX86p32_32RGBA888.L6: popl %ebp
-    jmp _x86return
-
-
-
-
-_ConvertX86p32_32BGRA888:
-
-    # check short
-    cmpl $32,%ecx
-    ja _ConvertX86p32_32BGRA888.L3
-
-_ConvertX86p32_32BGRA888.L1:  # short loop
-    movl (%esi),%edx
-    bswapl %edx
-    movl %edx,(%edi)
-    addl $4,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p32_32BGRA888.L1
-_ConvertX86p32_32BGRA888.L2:
-    jmp _x86return
-
-_ConvertX86p32_32BGRA888.L3:  # save ebp
-    pushl %ebp
-
-    # unroll four times
-    movl %ecx,%ebp
-    shrl $2,%ebp
-
-    # save count
-    pushl %ecx
-
-_ConvertX86p32_32BGRA888.L4: movl (%esi),%eax
-        movl 4(%esi),%ebx
-
-        movl 8(%esi),%ecx
-        movl 12(%esi),%edx
-
-        bswapl %eax
-
-        bswapl %ebx
-
-        bswapl %ecx
-
-        bswapl %edx
-
-        movl %eax,(%edi)
-        movl %ebx,4(%edi)
-
-        movl %ecx,8(%edi)
-        movl %edx,12(%edi)
-
-        addl $16,%esi
-        addl $16,%edi
-
-        decl %ebp
-        jnz _ConvertX86p32_32BGRA888.L4
-
-    # check tail
-    popl %ecx
-    andl $0b11,%ecx
-    jz _ConvertX86p32_32BGRA888.L6
-
-_ConvertX86p32_32BGRA888.L5:  # tail loop
-    movl (%esi),%edx
-    bswapl %edx
-    movl %edx,(%edi)
-    addl $4,%esi
-    addl $4,%edi
-    decl %ecx
-    jnz _ConvertX86p32_32BGRA888.L5
-
-_ConvertX86p32_32BGRA888.L6: popl %ebp
-    jmp _x86return
-
-
-
-
-## 32 bit RGB 888 to 24 BIT RGB 888
-
-_ConvertX86p32_24RGB888:
-
-        # check short
-        cmpl $32,%ecx
-        ja _ConvertX86p32_24RGB888.L3
-
-_ConvertX86p32_24RGB888.L1:  # short loop
-        movb (%esi),%al
-        movb 1(%esi),%bl
-        movb 2(%esi),%dl
-        movb %al,(%edi)
-        movb %bl,1(%edi)
-        movb %dl,2(%edi)
-        addl $4,%esi
-        addl $3,%edi
-        decl %ecx
-        jnz _ConvertX86p32_24RGB888.L1
-_ConvertX86p32_24RGB888.L2:
-        jmp _x86return
-
-_ConvertX86p32_24RGB888.L3:  #        head
-        movl %edi,%edx
-        andl $0b11,%edx
-        jz _ConvertX86p32_24RGB888.L4
-        movb (%esi),%al
-        movb 1(%esi),%bl
-        movb 2(%esi),%dl
-        movb %al,(%edi)
-        movb %bl,1(%edi)
-        movb %dl,2(%edi)
-        addl $4,%esi
-        addl $3,%edi
-        decl %ecx
-        jmp _ConvertX86p32_24RGB888.L3
-
-_ConvertX86p32_24RGB888.L4:  # unroll 4 times
-        pushl %ebp
-        movl %ecx,%ebp
-        shrl $2,%ebp
-
-    # save count
-        pushl %ecx
-
-_ConvertX86p32_24RGB888.L5: movl (%esi),%eax # first dword            eax = [A][R][G][B]
-        movl 4(%esi),%ebx               # second dword           ebx = [a][r][g][b]
-
-        shll $8,%eax                    #                        eax = [R][G][B][.]
-        movl 12(%esi),%ecx              # third dword            ecx = [a][r][g][b]
-
-        shll $8,%ebx                    #                        ebx = [r][g][b][.]
-        movb 4(%esi),%al                #                        eax = [R][G][B][b]
-
-        rorl $8,%eax                    #                        eax = [b][R][G][B] (done)
-        movb 8+1(%esi),%bh              #                        ebx = [r][g][G][.]
-
-        movl %eax,(%edi)
-        addl $3*4,%edi
-
-        shll $8,%ecx                    #                        ecx = [r][g][b][.]
-        movb 8+0(%esi),%bl              #                        ebx = [r][g][G][B]
-
-        roll $16,%ebx                   #                        ebx = [G][B][r][g] (done)
-        movb 8+2(%esi),%cl              #                        ecx = [r][g][b][R] (done)
-
-        movl %ebx,4-3*4(%edi)
-        addl $4*4,%esi
-
-        movl %ecx,8-3*4(%edi)
-        decl %ebp
-
-        jnz _ConvertX86p32_24RGB888.L5
-
-    # check tail
-        popl %ecx
-        andl $0b11,%ecx
-        jz _ConvertX86p32_24RGB888.L7
-
-_ConvertX86p32_24RGB888.L6:  # tail loop
-        movb (%esi),%al
-        movb 1(%esi),%bl
-        movb 2(%esi),%dl
-        movb %al,(%edi)
-        movb %bl,1(%edi)
-        movb %dl,2(%edi)
-        addl $4,%esi
-        addl $3,%edi
-        decl %ecx
-        jnz _ConvertX86p32_24RGB888.L6
-
-_ConvertX86p32_24RGB888.L7: popl %ebp
-        jmp _x86return
-
-
-
-
-## 32 bit RGB 888 to 24 bit BGR 888
-
-_ConvertX86p32_24BGR888:
-
-        # check short
-        cmpl $32,%ecx
-        ja _ConvertX86p32_24BGR888.L3
-
-
-_ConvertX86p32_24BGR888.L1:  # short loop
-        movb (%esi),%dl
-        movb 1(%esi),%bl
-        movb 2(%esi),%al
-        movb %al,(%edi)
-        movb %bl,1(%edi)
-        movb %dl,2(%edi)
-        addl $4,%esi
-        addl $3,%edi
-        decl %ecx
-        jnz _ConvertX86p32_24BGR888.L1
-_ConvertX86p32_24BGR888.L2:
-        jmp _x86return
-
-_ConvertX86p32_24BGR888.L3:  # head
-        movl %edi,%edx
-        andl $0b11,%edx
-        jz _ConvertX86p32_24BGR888.L4
-        movb (%esi),%dl
-        movb 1(%esi),%bl
-        movb 2(%esi),%al
-        movb %al,(%edi)
-        movb %bl,1(%edi)
-        movb %dl,2(%edi)
-        addl $4,%esi
-        addl $3,%edi
-        decl %ecx
-        jmp _ConvertX86p32_24BGR888.L3
-
-_ConvertX86p32_24BGR888.L4:  # unroll 4 times
-        pushl %ebp
-        movl %ecx,%ebp
-        shrl $2,%ebp
-
-        # save count
-        pushl %ecx
-
-_ConvertX86p32_24BGR888.L5:
-        movl (%esi),%eax                # first dword            eax = [A][R][G][B]
-        movl 4(%esi),%ebx               # second dword           ebx = [a][r][g][b]
-
-        bswapl %eax                     #                        eax = [B][G][R][A]
-
-        bswapl %ebx                     #                        ebx = [b][g][r][a]
-
-        movb 4+2(%esi),%al              #                        eax = [B][G][R][r]
-        movb 4+4+1(%esi),%bh            #                        ebx = [b][g][G][a]
-
-        rorl $8,%eax                    #                        eax = [r][B][G][R] (done)
-        movb 4+4+2(%esi),%bl            #                        ebx = [b][g][G][R]
-
-        rorl $16,%ebx                   #                        ebx = [G][R][b][g] (done)
-        movl %eax,(%edi)
-
-        movl %ebx,4(%edi)
-        movl 12(%esi),%ecx              # third dword            ecx = [a][r][g][b]
-
-        bswapl %ecx                     #                        ecx = [b][g][r][a]
-
-        movb 8(%esi),%cl                #                        ecx = [b][g][r][B] (done)
-        addl $4*4,%esi
-
-        movl %ecx,8(%edi)
-        addl $3*4,%edi
-
-        decl %ebp
-        jnz _ConvertX86p32_24BGR888.L5
-
-        # check tail
-        popl %ecx
-        andl $0b11,%ecx
-        jz _ConvertX86p32_24BGR888.L7
-
-_ConvertX86p32_24BGR888.L6:  # tail loop
-        movb (%esi),%dl
-        movb 1(%esi),%bl
-        movb 2(%esi),%al
-        movb %al,(%edi)
-        movb %bl,1(%edi)
-        movb %dl,2(%edi)
-        addl $4,%esi
-        addl $3,%edi
-        decl %ecx
-        jnz _ConvertX86p32_24BGR888.L6
-
-_ConvertX86p32_24BGR888.L7:
-        popl %ebp
-        jmp _x86return
-
-
-
-
-## 32 bit RGB 888 to 16 BIT RGB 565
-.align 8
-_ConvertX86p32_16RGB565:
-        # check short
-        cmpl $16,%ecx
-        ja _ConvertX86p32_16RGB565.L3
-
-_ConvertX86p32_16RGB565.L1:  # short loop
-        movb (%esi),%bl   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%ah  # red
-        shrb $3,%ah
-        andb $0b11111100,%al
-        shll $3,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-        jnz _ConvertX86p32_16RGB565.L1
-
-_ConvertX86p32_16RGB565.L2:     # End of short loop
-        jmp _x86return
-
-
-_ConvertX86p32_16RGB565.L3:  # head
-        movl %edi,%ebx
-        andl $0b11,%ebx
-        jz _ConvertX86p32_16RGB565.L4
-
-        movb (%esi),%bl   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%ah  # red
-        shrb $3,%ah
-        andb $0b11111100,%al
-        shll $3,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-
-_ConvertX86p32_16RGB565.L4:
-    # save count
-        pushl %ecx
-
-    # unroll twice
-        shrl %ecx
-
-    # point arrays to end
-        leal (%esi,%ecx,8),%esi
-        leal (%edi,%ecx,4),%edi
-
-    # negative counter
-        negl %ecx
-        jmp _ConvertX86p32_16RGB565.L6
-
-_ConvertX86p32_16RGB565.L5:
-        movl %eax,-4(%edi,%ecx,4)
-.align 8
-_ConvertX86p32_16RGB565.L6:
-        movl (%esi,%ecx,8),%eax
-
-        shrb $2,%ah
-        movl 4(%esi,%ecx,8),%ebx
-
-        shrl $3,%eax
-        movl 4(%esi,%ecx,8),%edx
-
-        shrb $2,%bh
-        movb 2(%esi,%ecx,8),%dl
-
-        shll $13,%ebx
-        andl $0x000007FF,%eax
-
-        shll $8,%edx
-        andl $0x07FF0000,%ebx
-
-        andl $0x0F800F800,%edx
-        addl %ebx,%eax
-
-        addl %edx,%eax
-        incl %ecx
-
-        jnz _ConvertX86p32_16RGB565.L5
-
-        movl %eax,-4(%edi,%ecx,4)
-
-    # tail
-        popl %ecx
-        testb $1,%cl
-        jz _ConvertX86p32_16RGB565.L7
-
-        movb (%esi),%bl   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%ah  # red
-        shrb $3,%ah
-        andb $0b11111100,%al
-        shll $3,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-
-_ConvertX86p32_16RGB565.L7:
-        jmp _x86return
-
-
-
-
-## 32 bit RGB 888 to 16 BIT BGR 565
-
-_ConvertX86p32_16BGR565:
-
-        # check short
-        cmpl $16,%ecx
-        ja _ConvertX86p32_16BGR565.L3
-
-_ConvertX86p32_16BGR565.L1:  # short loop
-        movb (%esi),%ah   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%bl  # red
-        shrb $3,%ah
-        andb $0b11111100,%al
-        shll $3,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-        jnz _ConvertX86p32_16BGR565.L1
-_ConvertX86p32_16BGR565.L2:
-        jmp _x86return
-
-_ConvertX86p32_16BGR565.L3:  # head
-        movl %edi,%ebx
-        andl $0b11,%ebx
-        jz _ConvertX86p32_16BGR565.L4
-        movb (%esi),%ah   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%bl  # red
-        shrb $3,%ah
-        andb $0b11111100,%al
-        shll $3,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-
-_ConvertX86p32_16BGR565.L4:  # save count
-        pushl %ecx
-
-        # unroll twice
-        shrl %ecx
-
-        # point arrays to end
-        leal (%esi,%ecx,8),%esi
-        leal (%edi,%ecx,4),%edi
-
-        # negative count
-        negl %ecx
-        jmp _ConvertX86p32_16BGR565.L6
-
-_ConvertX86p32_16BGR565.L5:
-        movl %eax,-4(%edi,%ecx,4)
-_ConvertX86p32_16BGR565.L6:
-        movl 4(%esi,%ecx,8),%edx
-
-        movb 4(%esi,%ecx,8),%bh
-        movb (%esi,%ecx,8),%ah
-
-        shrb $3,%bh
-        movb 1(%esi,%ecx,8),%al
-
-        shrb $3,%ah
-        movb 5(%esi,%ecx,8),%bl
-
-        shll $3,%eax
-        movb 2(%esi,%ecx,8),%dl
-
-        shll $19,%ebx
-        andl $0x0000FFE0,%eax
-
-        shrl $3,%edx
-        andl $0x0FFE00000,%ebx
-
-        andl $0x001F001F,%edx
-        addl %ebx,%eax
-
-        addl %edx,%eax
-        incl %ecx
-
-        jnz _ConvertX86p32_16BGR565.L5
-
-        movl %eax,-4(%edi,%ecx,4)
-
-        # tail
-        popl %ecx
-        andl $1,%ecx
-        jz _ConvertX86p32_16BGR565.L7
-        movb (%esi),%ah   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%bl  # red
-        shrb $3,%ah
-        andb $0b11111100,%al
-        shll $3,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-
-_ConvertX86p32_16BGR565.L7:
-        jmp _x86return
-
-
-
-
-## 32 BIT RGB TO 16 BIT RGB 555
-
-_ConvertX86p32_16RGB555:
-
-        # check short
-        cmpl $16,%ecx
-        ja _ConvertX86p32_16RGB555.L3
-
-_ConvertX86p32_16RGB555.L1:  # short loop
-        movb (%esi),%bl   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%ah  # red
-        shrb $3,%ah
-        andb $0b11111000,%al
-        shll $2,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-        jnz _ConvertX86p32_16RGB555.L1
-_ConvertX86p32_16RGB555.L2:
-        jmp _x86return
-
-_ConvertX86p32_16RGB555.L3:  # head
-        movl %edi,%ebx
-        andl $0b11,%ebx
-        jz _ConvertX86p32_16RGB555.L4
-        movb (%esi),%bl   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%ah  # red
-        shrb $3,%ah
-        andb $0b11111000,%al
-        shll $2,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-
-_ConvertX86p32_16RGB555.L4:  # save count
-        pushl %ecx
-
-        # unroll twice
-        shrl %ecx
-
-        # point arrays to end
-        leal (%esi,%ecx,8),%esi
-        leal (%edi,%ecx,4),%edi
-
-        # negative counter
-        negl %ecx
-        jmp _ConvertX86p32_16RGB555.L6
-
-_ConvertX86p32_16RGB555.L5:
-        movl %eax,-4(%edi,%ecx,4)
-_ConvertX86p32_16RGB555.L6:
-        movl (%esi,%ecx,8),%eax
-
-        shrb $3,%ah
-        movl 4(%esi,%ecx,8),%ebx
-
-        shrl $3,%eax
-        movl 4(%esi,%ecx,8),%edx
-
-        shrb $3,%bh
-        movb 2(%esi,%ecx,8),%dl
-
-        shll $13,%ebx
-        andl $0x000007FF,%eax
-
-        shll $7,%edx
-        andl $0x07FF0000,%ebx
-
-        andl $0x07C007C00,%edx
-        addl %ebx,%eax
-
-        addl %edx,%eax
-        incl %ecx
-
-        jnz _ConvertX86p32_16RGB555.L5
-
-        movl %eax,-4(%edi,%ecx,4)
-
-        # tail
-        popl %ecx
-        andl $1,%ecx
-        jz _ConvertX86p32_16RGB555.L7
-        movb (%esi),%bl   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%ah  # red
-        shrb $3,%ah
-        andb $0b11111000,%al
-        shll $2,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-
-_ConvertX86p32_16RGB555.L7:
-        jmp _x86return
-
-
-
-
-## 32 BIT RGB TO 16 BIT BGR 555
-
-_ConvertX86p32_16BGR555:
-
-        # check short
-        cmpl $16,%ecx
-        ja _ConvertX86p32_16BGR555.L3
-
-
-_ConvertX86p32_16BGR555.L1:  # short loop
-        movb (%esi),%ah   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%bl  # red
-        shrb $3,%ah
-        andb $0b11111000,%al
-        shll $2,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-        jnz _ConvertX86p32_16BGR555.L1
-_ConvertX86p32_16BGR555.L2:
-        jmp _x86return
-
-_ConvertX86p32_16BGR555.L3:  # head
-        movl %edi,%ebx
-        andl $0b11,%ebx
-        jz _ConvertX86p32_16BGR555.L4
-        movb (%esi),%ah   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%bl  # red
-        shrb $3,%ah
-        andb $0b11111000,%al
-        shll $2,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-        decl %ecx
-
-_ConvertX86p32_16BGR555.L4:  # save count
-        pushl %ecx
-
-        # unroll twice
-        shrl %ecx
-
-        # point arrays to end
-        leal (%esi,%ecx,8),%esi
-        leal (%edi,%ecx,4),%edi
-
-        # negative counter
-        negl %ecx
-        jmp _ConvertX86p32_16BGR555.L6
-
-_ConvertX86p32_16BGR555.L5:
-        movl %eax,-4(%edi,%ecx,4)
-_ConvertX86p32_16BGR555.L6:
-        movl 4(%esi,%ecx,8),%edx
-
-        movb 4(%esi,%ecx,8),%bh
-        movb (%esi,%ecx,8),%ah
-
-        shrb $3,%bh
-        movb 1(%esi,%ecx,8),%al
-
-        shrb $3,%ah
-        movb 5(%esi,%ecx,8),%bl
-
-        shll $2,%eax
-        movb 2(%esi,%ecx,8),%dl
-
-        shll $18,%ebx
-        andl $0x00007FE0,%eax
-
-        shrl $3,%edx
-        andl $0x07FE00000,%ebx
-
-        andl $0x001F001F,%edx
-        addl %ebx,%eax
-
-        addl %edx,%eax
-        incl %ecx
-
-        jnz _ConvertX86p32_16BGR555.L5
-
-        movl %eax,-4(%edi,%ecx,4)
-
-        # tail
-        popl %ecx
-        andl $1,%ecx
-        jz _ConvertX86p32_16BGR555.L7
-        movb (%esi),%ah   # blue
-        movb 1(%esi),%al  # green
-        movb 2(%esi),%bl  # red
-        shrb $3,%ah
-        andb $0b11111000,%al
-        shll $2,%eax
-        shrb $3,%bl
-        addb %bl,%al
-        movb %al,(%edi)
-        movb %ah,1(%edi)
-        addl $4,%esi
-        addl $2,%edi
-
-_ConvertX86p32_16BGR555.L7:
-        jmp _x86return
-
-
-
-
-
-## FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
-## This routine writes FOUR pixels at once (dword) and then, if they exist
-## the trailing three pixels
-_ConvertX86p32_8RGB332:
-
-
-_ConvertX86p32_8RGB332.L_ALIGNED:
-        pushl %ecx
-
-        shrl $2,%ecx            # We will draw 4 pixels at once
-        jnz _ConvertX86p32_8RGB332.L1
-
-        jmp _ConvertX86p32_8RGB332.L2 # short jump out of range :(
-
-_ConvertX86p32_8RGB332.L1:
-        movl (%esi),%eax        # first pair of pixels
-        movl 4(%esi),%edx
-
-        shrb $6,%dl
-        movl %eax,%ebx
-
-        shrb $6,%al
-        andb $0x0e0,%ah
-
-        shrl $16,%ebx
-        andb $0x0e0,%dh
-
-        shrb $3,%ah
-        andb $0x0e0,%bl
-
-        shrb $3,%dh
-
-        orb %bl,%al
-
-        movl %edx,%ebx
-        orb %ah,%al
-
-        shrl $16,%ebx
-        orb %dh,%dl
-
-        andb $0x0e0,%bl
-
-        orb %bl,%dl
-
-        movb %dl,%ah
-
-
-
-        movl 8(%esi),%ebx       # second pair of pixels
-
-        movl %ebx,%edx
-        andb $0x0e0,%bh
-
-        shrb $6,%bl
-        andl $0x0e00000,%edx
-
-        shrl $16,%edx
-
-        shrb $3,%bh
-
-        rorl $16,%eax
-        orb %dl,%bl
-
-        movl 12(%esi),%edx
-        orb %bh,%bl
-
-        movb %bl,%al
-
-        movl %edx,%ebx
-        andb $0x0e0,%dh
-
-        shrb $6,%dl
-        andl $0x0e00000,%ebx
-
-        shrb $3,%dh
-        movb %dl,%ah
-
-        shrl $16,%ebx
-        orb %dh,%ah
-
-        orb %bl,%ah
-
-        roll $16,%eax
-        addl $16,%esi
-
-        movl %eax,(%edi)
-        addl $4,%edi
-
-        decl %ecx
-        jz _ConvertX86p32_8RGB332.L2 # L1 out of range for short jump :(
-
-        jmp _ConvertX86p32_8RGB332.L1
-_ConvertX86p32_8RGB332.L2:
-
-        popl %ecx
-        andl $3,%ecx            # mask out number of pixels to draw
-
-        jz _ConvertX86p32_8RGB332.L4 # Nothing to do anymore
-
-_ConvertX86p32_8RGB332.L3:
-        movl (%esi),%eax        # single pixel conversion for trailing pixels
-
-        movl %eax,%ebx
-
-        shrb $6,%al
-        andb $0x0e0,%ah
-
-        shrl $16,%ebx
-
-        shrb $3,%ah
-        andb $0x0e0,%bl
-
-        orb %ah,%al
-        orb %bl,%al
-
-        movb %al,(%edi)
-
-        incl %edi
-        addl $4,%esi
-
-        decl %ecx
-        jnz _ConvertX86p32_8RGB332.L3
-
-_ConvertX86p32_8RGB332.L4:
-        jmp _x86return

+ 1041 - 0
packages/hermes/src/i386/x86p_32.inc

@@ -0,0 +1,1041 @@
+{
+    x86 format converters for HERMES
+    Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
+    Most routines are (c) Glenn Fiedler ([email protected]), used with permission
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+
+
+{ _Convert_*
+ Paramters:
+   ESI = source
+   EDI = dest
+   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
+ Destroys:
+   EAX, EBX, EDX
+}
+
+procedure ConvertX86p32_32BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+.L1:  // short loop
+    movl (%esi),%edx
+    bswapl %edx
+    rorl $8,%edx
+    movl %edx,(%edi)
+    addl $4,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // save ebp
+    pushl %ebp
+
+    // unroll four times
+    movl %ecx,%ebp
+    shrl $2,%ebp
+
+    // save count
+    pushl %ecx
+
+.L4:    movl (%esi),%eax
+        movl 4(%esi),%ebx
+
+        bswapl %eax
+
+        bswapl %ebx
+
+        rorl $8,%eax
+        movl 8(%esi),%ecx
+
+        rorl $8,%ebx
+        movl 12(%esi),%edx
+
+        bswapl %ecx
+
+        bswapl %edx
+
+        rorl $8,%ecx
+        movl %eax,(%edi)
+
+        rorl $8,%edx
+        movl %ebx,4(%edi)
+
+        movl %ecx,8(%edi)
+        movl %edx,12(%edi)
+
+        addl $16,%esi
+        addl $16,%edi
+
+        decl %ebp
+        jnz .L4
+
+    // check tail
+    popl %ecx
+    andl $0b11,%ecx
+    jz .L6
+
+.L5:  // tail loop
+    movl (%esi),%edx
+    bswapl %edx
+    rorl $8,%edx
+    movl %edx,(%edi)
+    addl $4,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L5
+
+.L6: popl %ebp
+    jmp _X86RETURN
+end;
+
+procedure ConvertX86p32_32RGBA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+.L1:  // short loop
+    movl (%esi),%edx
+    roll $8,%edx
+    movl %edx,(%edi)
+    addl $4,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // save ebp
+    pushl %ebp
+
+    // unroll four times
+    movl %ecx,%ebp
+    shrl $2,%ebp
+
+    // save count
+    pushl %ecx
+
+.L4:    movl (%esi),%eax
+        movl 4(%esi),%ebx
+
+        roll $8,%eax
+        movl 8(%esi),%ecx
+
+        roll $8,%ebx
+        movl 12(%esi),%edx
+
+        roll $8,%ecx
+        movl %eax,(%edi)
+
+        roll $8,%edx
+        movl %ebx,4(%edi)
+
+        movl %ecx,8(%edi)
+        movl %edx,12(%edi)
+
+        addl $16,%esi
+        addl $16,%edi
+
+        decl %ebp
+        jnz .L4
+
+    // check tail
+    popl %ecx
+    andl $0b11,%ecx
+    jz .L6
+
+.L5:  // tail loop
+    movl (%esi),%edx
+    roll $8,%edx
+    movl %edx,(%edi)
+    addl $4,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L5
+
+.L6: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+procedure ConvertX86p32_32BGRA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+    // check short
+    cmpl $32,%ecx
+    ja .L3
+
+.L1:  // short loop
+    movl (%esi),%edx
+    bswapl %edx
+    movl %edx,(%edi)
+    addl $4,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L1
+.L2:
+    jmp _X86RETURN
+
+.L3:  // save ebp
+    pushl %ebp
+
+    // unroll four times
+    movl %ecx,%ebp
+    shrl $2,%ebp
+
+    // save count
+    pushl %ecx
+
+.L4:    movl (%esi),%eax
+        movl 4(%esi),%ebx
+
+        movl 8(%esi),%ecx
+        movl 12(%esi),%edx
+
+        bswapl %eax
+
+        bswapl %ebx
+
+        bswapl %ecx
+
+        bswapl %edx
+
+        movl %eax,(%edi)
+        movl %ebx,4(%edi)
+
+        movl %ecx,8(%edi)
+        movl %edx,12(%edi)
+
+        addl $16,%esi
+        addl $16,%edi
+
+        decl %ebp
+        jnz .L4
+
+    // check tail
+    popl %ecx
+    andl $0b11,%ecx
+    jz .L6
+
+.L5:  // tail loop
+    movl (%esi),%edx
+    bswapl %edx
+    movl %edx,(%edi)
+    addl $4,%esi
+    addl $4,%edi
+    decl %ecx
+    jnz .L5
+
+.L6: popl %ebp
+    jmp _X86RETURN
+end;
+
+
+
+// 32 bit RGB 888 to 24 BIT RGB 888
+procedure ConvertX86p32_24RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // check short
+        cmpl $32,%ecx
+        ja .L3
+
+.L1:  // short loop
+        movb (%esi),%al
+        movb 1(%esi),%bl
+        movb 2(%esi),%dl
+        movb %al,(%edi)
+        movb %bl,1(%edi)
+        movb %dl,2(%edi)
+        addl $4,%esi
+        addl $3,%edi
+        decl %ecx
+        jnz .L1
+.L2:
+        jmp _X86RETURN
+
+.L3:  //        head
+        movl %edi,%edx
+        andl $0b11,%edx
+        jz .L4
+        movb (%esi),%al
+        movb 1(%esi),%bl
+        movb 2(%esi),%dl
+        movb %al,(%edi)
+        movb %bl,1(%edi)
+        movb %dl,2(%edi)
+        addl $4,%esi
+        addl $3,%edi
+        decl %ecx
+        jmp .L3
+
+.L4:  // unroll 4 times
+        pushl %ebp
+        movl %ecx,%ebp
+        shrl $2,%ebp
+
+    // save count
+        pushl %ecx
+
+.L5:    movl (%esi),%eax                // first dword            eax = [A][R][G][B]
+        movl 4(%esi),%ebx               // second dword           ebx = [a][r][g][b]
+
+        shll $8,%eax                    //                        eax = [R][G][B][.]
+        movl 12(%esi),%ecx              // third dword            ecx = [a][r][g][b]
+
+        shll $8,%ebx                    //                        ebx = [r][g][b][.]
+        movb 4(%esi),%al                //                        eax = [R][G][B][b]
+
+        rorl $8,%eax                    //                        eax = [b][R][G][B] (done)
+        movb 8+1(%esi),%bh              //                        ebx = [r][g][G][.]
+
+        movl %eax,(%edi)
+        addl $3*4,%edi
+
+        shll $8,%ecx                    //                        ecx = [r][g][b][.]
+        movb 8+0(%esi),%bl              //                        ebx = [r][g][G][B]
+
+        roll $16,%ebx                   //                        ebx = [G][B][r][g] (done)
+        movb 8+2(%esi),%cl              //                        ecx = [r][g][b][R] (done)
+
+        movl %ebx,4-3*4(%edi)
+        addl $4*4,%esi
+
+        movl %ecx,8-3*4(%edi)
+        decl %ebp
+
+        jnz .L5
+
+    // check tail
+        popl %ecx
+        andl $0b11,%ecx
+        jz .L7
+
+.L6:  // tail loop
+        movb (%esi),%al
+        movb 1(%esi),%bl
+        movb 2(%esi),%dl
+        movb %al,(%edi)
+        movb %bl,1(%edi)
+        movb %dl,2(%edi)
+        addl $4,%esi
+        addl $3,%edi
+        decl %ecx
+        jnz .L6
+
+.L7:    popl %ebp
+        jmp _X86RETURN
+end;
+
+
+
+// 32 bit RGB 888 to 24 bit BGR 888
+procedure ConvertX86p32_24BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // check short
+        cmpl $32,%ecx
+        ja .L3
+
+
+.L1:  // short loop
+        movb (%esi),%dl
+        movb 1(%esi),%bl
+        movb 2(%esi),%al
+        movb %al,(%edi)
+        movb %bl,1(%edi)
+        movb %dl,2(%edi)
+        addl $4,%esi
+        addl $3,%edi
+        decl %ecx
+        jnz .L1
+.L2:
+        jmp _X86RETURN
+
+.L3:  // head
+        movl %edi,%edx
+        andl $0b11,%edx
+        jz .L4
+        movb (%esi),%dl
+        movb 1(%esi),%bl
+        movb 2(%esi),%al
+        movb %al,(%edi)
+        movb %bl,1(%edi)
+        movb %dl,2(%edi)
+        addl $4,%esi
+        addl $3,%edi
+        decl %ecx
+        jmp .L3
+
+.L4:  // unroll 4 times
+        pushl %ebp
+        movl %ecx,%ebp
+        shrl $2,%ebp
+
+        // save count
+        pushl %ecx
+
+.L5:
+        movl (%esi),%eax                // first dword            eax = [A][R][G][B]
+        movl 4(%esi),%ebx               // second dword           ebx = [a][r][g][b]
+
+        bswapl %eax                     //                        eax = [B][G][R][A]
+
+        bswapl %ebx                     //                        ebx = [b][g][r][a]
+
+        movb 4+2(%esi),%al              //                        eax = [B][G][R][r]
+        movb 4+4+1(%esi),%bh            //                        ebx = [b][g][G][a]
+
+        rorl $8,%eax                    //                        eax = [r][B][G][R] (done)
+        movb 4+4+2(%esi),%bl            //                        ebx = [b][g][G][R]
+
+        rorl $16,%ebx                   //                        ebx = [G][R][b][g] (done)
+        movl %eax,(%edi)
+
+        movl %ebx,4(%edi)
+        movl 12(%esi),%ecx              // third dword            ecx = [a][r][g][b]
+
+        bswapl %ecx                     //                        ecx = [b][g][r][a]
+
+        movb 8(%esi),%cl                //                        ecx = [b][g][r][B] (done)
+        addl $4*4,%esi
+
+        movl %ecx,8(%edi)
+        addl $3*4,%edi
+
+        decl %ebp
+        jnz .L5
+
+        // check tail
+        popl %ecx
+        andl $0b11,%ecx
+        jz .L7
+
+.L6:  // tail loop
+        movb (%esi),%dl
+        movb 1(%esi),%bl
+        movb 2(%esi),%al
+        movb %al,(%edi)
+        movb %bl,1(%edi)
+        movb %dl,2(%edi)
+        addl $4,%esi
+        addl $3,%edi
+        decl %ecx
+        jnz .L6
+
+.L7:
+        popl %ebp
+        jmp _X86RETURN
+end;
+
+
+
+// 32 bit RGB 888 to 16 BIT RGB 565
+procedure ConvertX86p32_16RGB565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // check short
+        cmpl $16,%ecx
+        ja .L3
+
+.L1:  // short loop
+        movb (%esi),%bl   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%ah  // red
+        shrb $3,%ah
+        andb $0b11111100,%al
+        shll $3,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+        jnz .L1
+
+.L2:     // End of short loop
+        jmp _X86RETURN
+
+
+.L3:  // head
+        movl %edi,%ebx
+        andl $0b11,%ebx
+        jz .L4
+
+        movb (%esi),%bl   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%ah  // red
+        shrb $3,%ah
+        andb $0b11111100,%al
+        shll $3,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+
+.L4:
+    // save count
+        pushl %ecx
+
+    // unroll twice
+        shrl $1,%ecx
+
+    // point arrays to end
+        leal (%esi,%ecx,8),%esi
+        leal (%edi,%ecx,4),%edi
+
+    // negative counter
+        negl %ecx
+        jmp .L6
+
+.L5:
+        movl %eax,-4(%edi,%ecx,4)
+.balign 8
+.L6:
+        movl (%esi,%ecx,8),%eax
+
+        shrb $2,%ah
+        movl 4(%esi,%ecx,8),%ebx
+
+        shrl $3,%eax
+        movl 4(%esi,%ecx,8),%edx
+
+        shrb $2,%bh
+        movb 2(%esi,%ecx,8),%dl
+
+        shll $13,%ebx
+        andl $0x000007FF,%eax
+
+        shll $8,%edx
+        andl $0x07FF0000,%ebx
+
+        andl $0x0F800F800,%edx
+        addl %ebx,%eax
+
+        addl %edx,%eax
+        incl %ecx
+
+        jnz .L5
+
+        movl %eax,-4(%edi,%ecx,4)
+
+    // tail
+        popl %ecx
+        testb $1,%cl
+        jz .L7
+
+        movb (%esi),%bl   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%ah  // red
+        shrb $3,%ah
+        andb $0b11111100,%al
+        shll $3,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+
+.L7:
+        jmp _X86RETURN
+end;
+
+
+
+// 32 bit RGB 888 to 16 BIT BGR 565
+
+procedure ConvertX86p32_16BGR565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // check short
+        cmpl $16,%ecx
+        ja .L3
+
+.L1:  // short loop
+        movb (%esi),%ah   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%bl  // red
+        shrb $3,%ah
+        andb $0b11111100,%al
+        shll $3,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+        jnz .L1
+.L2:
+        jmp _X86RETURN
+
+.L3:  // head
+        movl %edi,%ebx
+        andl $0b11,%ebx
+        jz .L4
+        movb (%esi),%ah   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%bl  // red
+        shrb $3,%ah
+        andb $0b11111100,%al
+        shll $3,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+
+.L4:  // save count
+        pushl %ecx
+
+        // unroll twice
+        shrl $1,%ecx
+
+        // point arrays to end
+        leal (%esi,%ecx,8),%esi
+        leal (%edi,%ecx,4),%edi
+
+        // negative count
+        negl %ecx
+        jmp .L6
+
+.L5:
+        movl %eax,-4(%edi,%ecx,4)
+.L6:
+        movl 4(%esi,%ecx,8),%edx
+
+        movb 4(%esi,%ecx,8),%bh
+        movb (%esi,%ecx,8),%ah
+
+        shrb $3,%bh
+        movb 1(%esi,%ecx,8),%al
+
+        shrb $3,%ah
+        movb 5(%esi,%ecx,8),%bl
+
+        shll $3,%eax
+        movb 2(%esi,%ecx,8),%dl
+
+        shll $19,%ebx
+        andl $0x0000FFE0,%eax
+
+        shrl $3,%edx
+        andl $0x0FFE00000,%ebx
+
+        andl $0x001F001F,%edx
+        addl %ebx,%eax
+
+        addl %edx,%eax
+        incl %ecx
+
+        jnz .L5
+
+        movl %eax,-4(%edi,%ecx,4)
+
+        // tail
+        popl %ecx
+        andl $1,%ecx
+        jz .L7
+        movb (%esi),%ah   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%bl  // red
+        shrb $3,%ah
+        andb $0b11111100,%al
+        shll $3,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+
+.L7:
+        jmp _X86RETURN
+end;
+
+
+
+// 32 BIT RGB TO 16 BIT RGB 555
+procedure ConvertX86p32_16RGB555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // check short
+        cmpl $16,%ecx
+        ja .L3
+
+.L1:  // short loop
+        movb (%esi),%bl   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%ah  // red
+        shrb $3,%ah
+        andb $0b11111000,%al
+        shll $2,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+        jnz .L1
+.L2:
+        jmp _X86RETURN
+
+.L3:  // head
+        movl %edi,%ebx
+        andl $0b11,%ebx
+        jz .L4
+        movb (%esi),%bl   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%ah  // red
+        shrb $3,%ah
+        andb $0b11111000,%al
+        shll $2,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+
+.L4:  // save count
+        pushl %ecx
+
+        // unroll twice
+        shrl $1,%ecx
+
+        // point arrays to end
+        leal (%esi,%ecx,8),%esi
+        leal (%edi,%ecx,4),%edi
+
+        // negative counter
+        negl %ecx
+        jmp .L6
+
+.L5:
+        movl %eax,-4(%edi,%ecx,4)
+.L6:
+        movl (%esi,%ecx,8),%eax
+
+        shrb $3,%ah
+        movl 4(%esi,%ecx,8),%ebx
+
+        shrl $3,%eax
+        movl 4(%esi,%ecx,8),%edx
+
+        shrb $3,%bh
+        movb 2(%esi,%ecx,8),%dl
+
+        shll $13,%ebx
+        andl $0x000007FF,%eax
+
+        shll $7,%edx
+        andl $0x07FF0000,%ebx
+
+        andl $0x07C007C00,%edx
+        addl %ebx,%eax
+
+        addl %edx,%eax
+        incl %ecx
+
+        jnz .L5
+
+        movl %eax,-4(%edi,%ecx,4)
+
+        // tail
+        popl %ecx
+        andl $1,%ecx
+        jz .L7
+        movb (%esi),%bl   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%ah  // red
+        shrb $3,%ah
+        andb $0b11111000,%al
+        shll $2,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+
+.L7:
+        jmp _X86RETURN
+end;
+
+
+
+// 32 BIT RGB TO 16 BIT BGR 555
+procedure ConvertX86p32_16BGR555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        // check short
+        cmpl $16,%ecx
+        ja .L3
+
+
+.L1:  // short loop
+        movb (%esi),%ah   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%bl  // red
+        shrb $3,%ah
+        andb $0b11111000,%al
+        shll $2,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+        jnz .L1
+.L2:
+        jmp _X86RETURN
+
+.L3:  // head
+        movl %edi,%ebx
+        andl $0b11,%ebx
+        jz .L4
+        movb (%esi),%ah   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%bl  // red
+        shrb $3,%ah
+        andb $0b11111000,%al
+        shll $2,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+        decl %ecx
+
+.L4:  // save count
+        pushl %ecx
+
+        // unroll twice
+        shrl $1,%ecx
+
+        // point arrays to end
+        leal (%esi,%ecx,8),%esi
+        leal (%edi,%ecx,4),%edi
+
+        // negative counter
+        negl %ecx
+        jmp .L6
+
+.L5:
+        movl %eax,-4(%edi,%ecx,4)
+.L6:
+        movl 4(%esi,%ecx,8),%edx
+
+        movb 4(%esi,%ecx,8),%bh
+        movb (%esi,%ecx,8),%ah
+
+        shrb $3,%bh
+        movb 1(%esi,%ecx,8),%al
+
+        shrb $3,%ah
+        movb 5(%esi,%ecx,8),%bl
+
+        shll $2,%eax
+        movb 2(%esi,%ecx,8),%dl
+
+        shll $18,%ebx
+        andl $0x00007FE0,%eax
+
+        shrl $3,%edx
+        andl $0x07FE00000,%ebx
+
+        andl $0x001F001F,%edx
+        addl %ebx,%eax
+
+        addl %edx,%eax
+        incl %ecx
+
+        jnz .L5
+
+        movl %eax,-4(%edi,%ecx,4)
+
+        // tail
+        popl %ecx
+        andl $1,%ecx
+        jz .L7
+        movb (%esi),%ah   // blue
+        movb 1(%esi),%al  // green
+        movb 2(%esi),%bl  // red
+        shrb $3,%ah
+        andb $0b11111000,%al
+        shll $2,%eax
+        shrb $3,%bl
+        addb %bl,%al
+        movb %al,(%edi)
+        movb %ah,1(%edi)
+        addl $4,%esi
+        addl $2,%edi
+
+.L7:
+        jmp _X86RETURN
+end;
+
+
+
+
+// FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
+// This routine writes FOUR pixels at once (dword) and then, if they exist
+// the trailing three pixels
+procedure ConvertX86p32_8RGB332(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+.L_ALIGNED:
+        pushl %ecx
+
+        shrl $2,%ecx            // We will draw 4 pixels at once
+        jnz .L1
+
+        jmp .L2 // short jump out of range :(
+
+.L1:
+        movl (%esi),%eax        // first pair of pixels
+        movl 4(%esi),%edx
+
+        shrb $6,%dl
+        movl %eax,%ebx
+
+        shrb $6,%al
+        andb $0x0e0,%ah
+
+        shrl $16,%ebx
+        andb $0x0e0,%dh
+
+        shrb $3,%ah
+        andb $0x0e0,%bl
+
+        shrb $3,%dh
+
+        orb %bl,%al
+
+        movl %edx,%ebx
+        orb %ah,%al
+
+        shrl $16,%ebx
+        orb %dh,%dl
+
+        andb $0x0e0,%bl
+
+        orb %bl,%dl
+
+        movb %dl,%ah
+
+
+
+        movl 8(%esi),%ebx       // second pair of pixels
+
+        movl %ebx,%edx
+        andb $0x0e0,%bh
+
+        shrb $6,%bl
+        andl $0x0e00000,%edx
+
+        shrl $16,%edx
+
+        shrb $3,%bh
+
+        rorl $16,%eax
+        orb %dl,%bl
+
+        movl 12(%esi),%edx
+        orb %bh,%bl
+
+        movb %bl,%al
+
+        movl %edx,%ebx
+        andb $0x0e0,%dh
+
+        shrb $6,%dl
+        andl $0x0e00000,%ebx
+
+        shrb $3,%dh
+        movb %dl,%ah
+
+        shrl $16,%ebx
+        orb %dh,%ah
+
+        orb %bl,%ah
+
+        roll $16,%eax
+        addl $16,%esi
+
+        movl %eax,(%edi)
+        addl $4,%edi
+
+        decl %ecx
+        jz .L2 // L1 out of range for short jump :(
+
+        jmp .L1
+.L2:
+
+        popl %ecx
+        andl $3,%ecx            // mask out number of pixels to draw
+
+        jz .L4 // Nothing to do anymore
+
+.L3:
+        movl (%esi),%eax        // single pixel conversion for trailing pixels
+
+        movl %eax,%ebx
+
+        shrb $6,%al
+        andb $0x0e0,%ah
+
+        shrl $16,%ebx
+
+        shrb $3,%ah
+        andb $0x0e0,%bl
+
+        orb %ah,%al
+        orb %bl,%al
+
+        movb %al,(%edi)
+
+        incl %edi
+        addl $4,%esi
+
+        decl %ecx
+        jnz .L3
+
+.L4:
+        jmp _X86RETURN
+end;

+ 0 - 120
packages/hermes/src/i386/x86p_cpy.as

@@ -1,120 +0,0 @@
-
-
-
-
-
-.globl _CopyX86p_4byte
-.globl _CopyX86p_3byte
-.globl _CopyX86p_2byte
-.globl _CopyX86p_1byte
-
-.extern _x86return
-
-
-.text
-
-## _Copy*
-## Paramters:
-##   ESI = source
-##   EDI = dest
-##   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
-## Destroys:
-##   EAX, EBX, EDX
-
-_CopyX86p_4byte:
-
-        rep
- movsl
-
-        jmp _x86return
-
-
-_CopyX86p_3byte:
-
-        leal (%ecx,%ecx,2),%ecx
-        jmp _CopyX86p_1byte
-
-
-
-_CopyX86p_2byte:
-
-        testl $3,%edi                   # Check if video memory is aligned
-        jz _CopyX86p_2byte.L_ALIGNED
-
-        movw (%esi),%ax
-        addl $2,%esi
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-        decl %ecx
-        jz _CopyX86p_2byte.L3
-
-_CopyX86p_2byte.L_ALIGNED:
-
-        movl %ecx,%ebx                  # Save ecx for later
-
-        shrl %ecx
-        jz _CopyX86p_2byte.L2
-
-        rep
- movsl
-
-_CopyX86p_2byte.L2:
-        andl $1,%ebx
-        jz _CopyX86p_2byte.L3
-
-        movw (%esi),%ax
-        addl $2,%esi
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-_CopyX86p_2byte.L3:
-        jmp _x86return
-
-
-
-_CopyX86p_1byte:
-
-_CopyX86p_1byte.L_alignloop:
-        testl $3,%edi
-        jz _CopyX86p_1byte.L_aligned
-
-        movb (%esi),%al
-        incl %esi
-
-        movb %al,(%edi)
-        incl %edi
-
-        decl %ecx
-        jz _CopyX86p_1byte.L4
-        jmp _CopyX86p_1byte.L_alignloop
-
-_CopyX86p_1byte.L_aligned:
-        movl %ecx,%edx
-
-        shrl $2,%ecx
-        jz _CopyX86p_1byte.L2
-
-        rep
- movsl
-
-_CopyX86p_1byte.L2:
-        movl %edx,%ecx          # Get the remaining pixels to draw
-
-        andl $3,%ecx
-        jz _CopyX86p_1byte.L4   # width was modulo 4
-
-_CopyX86p_1byte.L3:
-        movb (%esi),%al
-        incl %esi
-
-        movb %al,(%edi)
-        incl %edi
-
-        decl %ecx
-        jnz _CopyX86p_1byte.L3
-
-_CopyX86p_1byte.L4:
-        jmp _x86return

+ 148 - 0
packages/hermes/src/i386/x86p_cpy.inc

@@ -0,0 +1,148 @@
+{
+    x86 format converters for HERMES
+    Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
+    Some routines are (c) Glenn Fiedler ([email protected]), used with permission
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+
+
+
+
+{ _Copy*
+ Paramters:
+   ESI = source
+   EDI = dest
+   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
+ Destroys:
+   EAX, EBX, EDX
+}
+
+procedure CopyX86p_4byte(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+
+        rep
+ movsl
+
+        jmp _X86RETURN
+end;
+
+
+procedure CopyX86p_1byte(CONVERT_PARAMETERS); cdecl; nostackframe; forward;
+
+procedure CopyX86p_3byte; cdecl; nostackframe; assembler;
+asm
+        leal (%ecx,%ecx,2),%ecx
+        jmp CopyX86p_1byte
+end;
+
+
+
+procedure CopyX86p_2byte(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        testl $3,%edi                   // Check if video memory is aligned
+        jz .L_ALIGNED
+
+        movw (%esi),%ax
+        addl $2,%esi
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+        decl %ecx
+        jz .L3
+
+.L_ALIGNED:
+
+        movl %ecx,%ebx                  // Save ecx for later
+
+        shrl $1,%ecx
+        jz .L2
+
+        rep
+ movsl
+
+.L2:
+        andl $1,%ebx
+        jz .L3
+
+        movw (%esi),%ax
+        addl $2,%esi
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+.L3:
+        jmp _X86RETURN
+end;
+
+
+procedure CopyX86p_1byte(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+.L_alignloop:
+        testl $3,%edi
+        jz .L_aligned
+
+        movb (%esi),%al
+        incl %esi
+
+        movb %al,(%edi)
+        incl %edi
+
+        decl %ecx
+        jz .L4
+        jmp .L_alignloop
+
+.L_aligned:
+        movl %ecx,%edx
+
+        shrl $2,%ecx
+        jz .L2
+
+        rep
+ movsl
+
+.L2:
+        movl %edx,%ecx          // Get the remaining pixels to draw
+
+        andl $3,%ecx
+        jz .L4                  // width was modulo 4
+
+.L3:
+        movb (%esi),%al
+        incl %esi
+
+        movb %al,(%edi)
+        incl %edi
+
+        decl %ecx
+        jnz .L3
+
+.L4:
+        jmp _X86RETURN
+end;

+ 0 - 224
packages/hermes/src/i386/x86p_i8.as

@@ -1,224 +0,0 @@
-#
-# x86 format converters for HERMES
-# Copyright (c) 1998 Christian Nentwich ([email protected])
-# This source code is licensed under the GNU LGPL
-#
-# Please refer to the file COPYING.LIB contained in the distribution for
-# licensing conditions
-#
-# Some routines are (c) Glenn Fiedler ([email protected]), used with permission
-#
-
-
-
-.globl _ConvertX86pI8_32
-.globl _ConvertX86pI8_24
-.globl _ConvertX86pI8_16
-
-.extern _ConvertX86
-.extern _x86return
-
-.text
-
-
-## Convert_*
-## Paramters:
-##   ESI = source
-##   EDI = dest
-##   ECX = amount (NOT 0!!! (the ConvertX86 routine checks for that though))
-## Destroys:
-##   EAX, EBX, EDX
-
-_ConvertX86pI8_32:
-
-        xorl %ebx,%ebx
-        movl 36(%ebp),%edx
-_ConvertX86pI8_32.L1:
-        movb (%esi),%bl
-        incl %esi
-
-        movl (%edx,%ebx,4),%eax
-
-        movl %eax,(%edi)
-        addl $4,%edi
-
-        decl %ecx
-        jnz _ConvertX86pI8_32.L1
-
-        jmp _x86return
-
-
-
-_ConvertX86pI8_24:
-        movl 36(%ebp),%ebx
-
-        xorl %edx,%edx
-
-        # check short
-        cmpl $32,%ecx
-        ja _ConvertX86pI8_24.L3
-
-
-_ConvertX86pI8_24.L1:  # short loop
-        movb (%esi),%dl
-        movl (%ebx,%edx,4),%eax
-        movb %al,(%edi) # blue
-        movb %ah,1(%edi)# green
-        shrl $16,%eax
-        movb %al,2(%edi)# red
-        incl %esi
-        addl $3,%edi
-        decl %ecx
-        jnz _ConvertX86pI8_24.L1
-_ConvertX86pI8_24.L2:
-        jmp _x86return
-
-_ConvertX86pI8_24.L3:  # head
-        movl %edi,%eax
-        andl $0b11,%eax
-        jz _ConvertX86pI8_24.L4
-        movb (%esi),%dl
-        movl (%ebx,%edx,4),%eax
-        movb %al,(%edi) # blue
-        movb %ah,1(%edi)# green
-        shrl $16,%eax
-        movb %al,2(%edi)# red
-        incl %esi
-        addl $3,%edi
-        decl %ecx
-        jmp _ConvertX86pI8_24.L3
-
-_ConvertX86pI8_24.L4:  # save ebp
-        pushl %ebp
-        movl %ebx,%ebp
-
-        # save count
-        pushl %ecx
-
-        # unroll 4 times
-        shrl $2,%ecx
-
-_ConvertX86pI8_24.L5: pushl %ecx        # save ecx
-        movb (%esi),%dl                 # index to "A"
-
-        movl (%ebp,%edx,4),%eax         # eax = [xx][A2][A1][A0]
-        shll $8,%eax                    # eax = [A2][A1][A0][xx]
-
-        movb 1(%esi),%dl                # index to "B"
-
-        movb (%ebp,%edx,4),%al          # eax = [A2][A1][A0][B0]
-        rorl $8,%eax                    # eax = [B0][A2][A1][A0] (done)
-        movl %eax,(%edi)
-
-        movl (%ebp,%edx,4),%eax         # eax = [xx][B2][B1][B0]
-        shll $8,%eax                    # eax = [B2][B1][B0][xx]
-
-        movb 3(%esi),%dl                # index to "D"
-
-        movl (%ebp,%edx,4),%ecx         # ecx = [xx][D2][D1][D0]
-        shll $8,%ecx                    # ecx = [D2][D1][D0][xx]
-
-        movb 2(%esi),%dl                # index to "C"
-
-        movb 1(%ebp,%edx,4),%ah         # eax = [B2][B1][C1][xx]
-        movb (%ebp,%edx,4),%al          # eax = [B2][B1][C1][C0]
-        rorl $16,%eax                   # eax = [C1][C0][B2][B1] (done)
-
-        movb 2(%ebp,%edx,4),%cl         # ecx = [D2][D1][D0][C2] (done)
-
-        movl %eax,4(%edi)
-        movl %ecx,8(%edi)
-
-        addl $4,%esi
-        addl $3*4,%edi
-
-        popl %ecx                       # restore ecx
-
-        decl %ecx
-        jnz _ConvertX86pI8_24.L5
-
-        # tail
-        popl %ecx
-        andl $0b11,%ecx
-        jz _ConvertX86pI8_24.L7
-
-_ConvertX86pI8_24.L6:
-        movb (%esi),%dl
-        movl (%ebx,%edx,4),%eax
-        movb %al,(%edi) # blue
-        movb %ah,1(%edi)# green
-        shrl $16,%eax
-        movb %al,2(%edi)# red
-        incl %esi
-        addl $3,%edi
-        decl %ecx
-        jnz _ConvertX86pI8_24.L6
-
-_ConvertX86pI8_24.L7: popl %ebp
-        jmp _x86return
-
-
-.align 8
-_ConvertX86pI8_16:
-        xorl %ebx,%ebx
-        movl 36(%ebp),%edx
-
-        testl $3,%edi
-        jz _ConvertX86pI8_16.Laligned
-
-        movb (%esi),%bl
-
-        movl (%edx,%ebx,4),%eax
-        incl %esi
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-        decl %ecx
-        jz _ConvertX86pI8_16.out
-
-_ConvertX86pI8_16.Laligned:
-        pushl %ecx
-
-        xorl %eax,%eax
-        xorl %ebx,%ebx
-
-        shrl %ecx
-        jz _ConvertX86pI8_16.last_pixel
-.align 8
-_ConvertX86pI8_16.Ly:
-        movb 1(%esi),%bl
-        movb (%esi),%al
-
-        movl (%edx,%ebx,4),%ebx
-        addl $2,%esi
-
-        shll $16,%ebx
-        movl (%edx,%eax,4),%eax
-
-        orl %ebx,%eax
-        xorl %ebx,%ebx
-
-        movl %eax,(%edi)
-        addl $4,%edi
-
-        xorl %eax,%eax
-        decl %ecx
-        jnz _ConvertX86pI8_16.Ly
-
-_ConvertX86pI8_16.last_pixel:
-        popl %ecx
-
-        testb $1,%cl
-        jz _ConvertX86pI8_16.out
-
-        movb (%esi),%bl
-
-        movl (%edx,%ebx,4),%eax
-        incl %esi
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-_ConvertX86pI8_16.out:
-        jmp _x86return

+ 238 - 0
packages/hermes/src/i386/x86p_i8.inc

@@ -0,0 +1,238 @@
+{
+    x86 format converters for HERMES
+    Copyright (c) 1998 Christian Nentwich ([email protected])
+    Some routines are (c) Glenn Fiedler ([email protected]), used with permission
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+
+
+{ Convert_*
+ Paramters:
+   ESI = source
+   EDI = dest
+   ECX = amount (NOT 0!!! (the ConvertX86 routine checks for that though))
+ Destroys:
+   EAX, EBX, EDX
+}
+
+procedure ConvertX86pI8_32(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        xorl %ebx,%ebx
+        movl 36(%ebp),%edx
+.L1:
+        movb (%esi),%bl
+        incl %esi
+
+        movl (%edx,%ebx,4),%eax
+
+        movl %eax,(%edi)
+        addl $4,%edi
+
+        decl %ecx
+        jnz .L1
+
+        jmp _X86RETURN
+end;
+
+
+procedure ConvertX86pI8_24(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        movl 36(%ebp),%ebx
+
+        xorl %edx,%edx
+
+        // check short
+        cmpl $32,%ecx
+        ja .L3
+
+
+.L1:  // short loop
+        movb (%esi),%dl
+        movl (%ebx,%edx,4),%eax
+        movb %al,(%edi)  // blue
+        movb %ah,1(%edi) // green
+        shrl $16,%eax
+        movb %al,2(%edi) // red
+        incl %esi
+        addl $3,%edi
+        decl %ecx
+        jnz .L1
+.L2:
+        jmp _X86RETURN
+
+.L3:  // head
+        movl %edi,%eax
+        andl $0b11,%eax
+        jz .L4
+        movb (%esi),%dl
+        movl (%ebx,%edx,4),%eax
+        movb %al,(%edi)  // blue
+        movb %ah,1(%edi) // green
+        shrl $16,%eax
+        movb %al,2(%edi) // red
+        incl %esi
+        addl $3,%edi
+        decl %ecx
+        jmp .L3
+
+.L4:    // save ebp
+        pushl %ebp
+        movl %ebx,%ebp
+
+        // save count
+        pushl %ecx
+
+        // unroll 4 times
+        shrl $2,%ecx
+
+.L5:    pushl %ecx                      // save ecx
+        movb (%esi),%dl                 // index to "A"
+
+        movl (%ebp,%edx,4),%eax         // eax = [xx][A2][A1][A0]
+        shll $8,%eax                    // eax = [A2][A1][A0][xx]
+
+        movb 1(%esi),%dl                // index to "B"
+
+        movb (%ebp,%edx,4),%al          // eax = [A2][A1][A0][B0]
+        rorl $8,%eax                    // eax = [B0][A2][A1][A0] (done)
+        movl %eax,(%edi)
+
+        movl (%ebp,%edx,4),%eax         // eax = [xx][B2][B1][B0]
+        shll $8,%eax                    // eax = [B2][B1][B0][xx]
+
+        movb 3(%esi),%dl                // index to "D"
+
+        movl (%ebp,%edx,4),%ecx         // ecx = [xx][D2][D1][D0]
+        shll $8,%ecx                    // ecx = [D2][D1][D0][xx]
+
+        movb 2(%esi),%dl                // index to "C"
+
+        movb 1(%ebp,%edx,4),%ah         // eax = [B2][B1][C1][xx]
+        movb (%ebp,%edx,4),%al          // eax = [B2][B1][C1][C0]
+        rorl $16,%eax                   // eax = [C1][C0][B2][B1] (done)
+
+        movb 2(%ebp,%edx,4),%cl         // ecx = [D2][D1][D0][C2] (done)
+
+        movl %eax,4(%edi)
+        movl %ecx,8(%edi)
+
+        addl $4,%esi
+        addl $3*4,%edi
+
+        popl %ecx                       // restore ecx
+
+        decl %ecx
+        jnz .L5
+
+        // tail
+        popl %ecx
+        andl $0b11,%ecx
+        jz .L7
+
+.L6:
+        movb (%esi),%dl
+        movl (%ebx,%edx,4),%eax
+        movb %al,(%edi)  // blue
+        movb %ah,1(%edi) // green
+        shrl $16,%eax
+        movb %al,2(%edi) // red
+        incl %esi
+        addl $3,%edi
+        decl %ecx
+        jnz .L6
+
+.L7:    popl %ebp
+        jmp _X86RETURN
+end;
+
+procedure ConvertX86pI8_16(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        xorl %ebx,%ebx
+        movl 36(%ebp),%edx
+
+        testl $3,%edi
+        jz .Laligned
+
+        movb (%esi),%bl
+
+        movl (%edx,%ebx,4),%eax
+        incl %esi
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+        decl %ecx
+        jz .LOut
+
+.Laligned:
+        pushl %ecx
+
+        xorl %eax,%eax
+        xorl %ebx,%ebx
+
+        shrl $1,%ecx
+        jz .Last_pixel
+.balign 8
+.Ly:
+        movb 1(%esi),%bl
+        movb (%esi),%al
+
+        movl (%edx,%ebx,4),%ebx
+        addl $2,%esi
+
+        shll $16,%ebx
+        movl (%edx,%eax,4),%eax
+
+        orl %ebx,%eax
+        xorl %ebx,%ebx
+
+        movl %eax,(%edi)
+        addl $4,%edi
+
+        xorl %eax,%eax
+        decl %ecx
+        jnz .Ly
+
+.Last_pixel:
+        popl %ecx
+
+        testb $1,%cl
+        jz .LOut
+
+        movb (%esi),%bl
+
+        movl (%edx,%ebx,4),%eax
+        incl %esi
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+.LOut:
+        jmp _X86RETURN
+end;

+ 0 - 114
packages/hermes/src/i386/x86p_s32.as

@@ -1,114 +0,0 @@
-
-
-
-.globl _ConvertX86p32_16RGB565_S
-
-.text
-
-.extern _x86return_S
-
-## _Convert*_S
-## Paramters:
-##   ESI = source
-##   EDI = dest
-##   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
-##   EDX = x increment
-## Destroys:
-##   EAX, EBX, ECX, EDX
-
-
-_ConvertX86p32_16RGB565_S:
-
-        pushl %ebp
-        pushl %edx              # increment now at [esp+4]!
-
-        movl $0,%ebp
-
-        pushl %ecx
-        shrl %ecx
-        jnz _ConvertX86p32_16RGB565_S.L_ok
-        jmp _ConvertX86p32_16RGB565_S.L_final
-
-_ConvertX86p32_16RGB565_S.L_ok:
-
-.align 8
-_ConvertX86p32_16RGB565_S.Lx:
-        movl %ebp,%eax
-
-        shrl $14,%eax
-        addl 4(%esp),%ebp
-
-        movl %ebp,%edx
-        andl $0x0fffffffc,%eax
-
-        shrl $14,%edx
-        movl (%esi,%eax,),%ebx  # ebx = pixel one
-
-        andl $0x0fffffffc,%edx
-        andl $0x0f8fcf8,%ebx
-
-        shrb $2,%bh
-        movl (%esi,%edx,),%eax  # eax = pixel two
-
-        andl $0x0f8fcf8,%eax
-        addl 4(%esp),%ebp
-
-        shrb $2,%ah             # eax & ebx= rrrrr000|00gggggg|bbbbb000
-
-        movl %eax,%edx
-
-        shrw $3,%dx
-        andl $0x0f80000,%eax
-
-        shrl $8,%eax
-
-        orl %edx,%eax
-
-        shll $16,%eax
-        movl %ebx,%edx
-
-        shrw $3,%dx
-        andl $0x0f80000,%ebx
-
-        shrl $8,%ebx
-        orb %dh,%ah
-
-        orb %bh,%ah
-        orb %dl,%al
-
-        movl %eax,(%edi)
-        addl $4,%edi
-
-        decl %ecx
-        jnz _ConvertX86p32_16RGB565_S.Lx
-
-_ConvertX86p32_16RGB565_S.L_final:
-        popl %ecx
-        andl $1,%ecx
-        jz _ConvertX86p32_16RGB565_S.L_out
-
-        shrl $14,%ebp           # trailing pixel
-
-        andl $0x0fffffffc,%ebp
-
-        movl (%esi,%ebp,),%eax
-        movl (%esi,%ebp,),%ebx
-
-        shrl $8,%ebx
-        andl $0x0fcf8,%eax
-
-        shrb $2,%ah
-        andl $0x0f800,%ebx
-
-        shrl $3,%eax
-
-        orl %ebx,%eax
-
-        movw %ax,(%edi)
-        addl $2,%edi
-
-_ConvertX86p32_16RGB565_S.L_out:
-
-        popl %edx
-        popl %ebp
-        jmp _x86return_S

+ 140 - 0
packages/hermes/src/i386/x86p_s32.inc

@@ -0,0 +1,140 @@
+{
+    x86 format converters for HERMES
+    Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
+    Some routines are (c) Glenn Fiedler ([email protected]), used with permission
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+
+
+{ _Convert*_S
+ Paramters:
+   ESI = source
+   EDI = dest
+   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
+   EDX = x increment
+ Destroys:
+   EAX, EBX, ECX, EDX
+}
+
+procedure ConvertX86p32_16RGB565_S(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        pushl %ebp
+        pushl %edx              // increment now at [esp+4]!
+
+        movl $0,%ebp
+
+        pushl %ecx
+        shrl $1,%ecx
+        jnz .L_ok
+        jmp .L_final
+
+.L_ok:
+
+.balign 8
+.Lx:
+        movl %ebp,%eax
+
+        shrl $14,%eax
+        addl 4(%esp),%ebp
+
+        movl %ebp,%edx
+        andl $0x0fffffffc,%eax
+
+        shrl $14,%edx
+        movl (%esi,%eax),%ebx  // ebx = pixel one
+
+        andl $0x0fffffffc,%edx
+        andl $0x0f8fcf8,%ebx
+
+        shrb $2,%bh
+        movl (%esi,%edx),%eax  // eax = pixel two
+
+        andl $0x0f8fcf8,%eax
+        addl 4(%esp),%ebp
+
+        shrb $2,%ah             // eax & ebx= rrrrr000|00gggggg|bbbbb000
+
+        movl %eax,%edx
+
+        shrw $3,%dx
+        andl $0x0f80000,%eax
+
+        shrl $8,%eax
+
+        orl %edx,%eax
+
+        shll $16,%eax
+        movl %ebx,%edx
+
+        shrw $3,%dx
+        andl $0x0f80000,%ebx
+
+        shrl $8,%ebx
+        orb %dh,%ah
+
+        orb %bh,%ah
+        orb %dl,%al
+
+        movl %eax,(%edi)
+        addl $4,%edi
+
+        decl %ecx
+        jnz .Lx
+
+.L_final:
+        popl %ecx
+        andl $1,%ecx
+        jz .L_out
+
+        shrl $14,%ebp           // trailing pixel
+
+        andl $0x0fffffffc,%ebp
+
+        movl (%esi,%ebp),%eax
+        movl (%esi,%ebp),%ebx
+
+        shrl $8,%ebx
+        andl $0x0fcf8,%eax
+
+        shrb $2,%ah
+        andl $0x0f800,%ebx
+
+        shrl $3,%eax
+
+        orl %ebx,%eax
+
+        movw %ax,(%edi)
+        addl $2,%edi
+
+.L_out:
+
+        popl %edx
+        popl %ebp
+        jmp _X86RETURN_S
+end;

+ 0 - 40
packages/hermes/src/i386/x86pscpy.as

@@ -1,40 +0,0 @@
-
-
-
-.globl _CopyX86p_4byte_S
-.globl _CopyX86p_3byte_S
-.globl _CopyX86p_2byte_S
-.globl _CopyX86p_1byte_S
-
-.extern _x86return
-
-
-.text
-
-## _Copy*
-## Paramters:
-##   ESI = source
-##   EDI = dest
-##   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
-## Destroys:
-##   EAX, EBX, EDX
-
-_CopyX86p_4byte_S:
-
-
-        jmp _x86return_S
-
-
-_CopyX86p_3byte_S:
-
-        jmp _x86return_S
-
-
-
-_CopyX86p_2byte_S:
-
-        jmp _x86return_S
-
-_CopyX86p_1byte_S:
-
-        jmp _x86return_S

+ 60 - 0
packages/hermes/src/i386/x86pscpy.inc

@@ -0,0 +1,60 @@
+{
+    x86 format converters for HERMES
+    Some routines Copyright (c) 1998 Christian Nentwich ([email protected])
+    Some routines are (c) Glenn Fiedler ([email protected]), used with permission
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+{ _Copy*
+ Paramters:
+   ESI = source
+   EDI = dest
+   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
+ Destroys:
+   EAX, EBX, EDX
+}
+
+procedure CopyX86p_4byte_S(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        jmp _X86RETURN_S
+end;
+
+procedure CopyX86p_3byte_S(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        jmp _X86RETURN_S
+end;
+
+procedure CopyX86p_2byte_S(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        jmp _X86RETURN_S
+end;
+
+procedure CopyX86p_1byte_S(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
+asm
+        jmp _X86RETURN_S
+end;