Browse Source

* updated hermes

git-svn-id: trunk@23002 -
nickysn 12 years ago
parent
commit
7d0b209349

+ 2 - 0
.gitattributes

@@ -3967,6 +3967,8 @@ packages/hermes/src/p_gca.inc svneol=native#text/plain
 packages/hermes/src/p_gcc.inc svneol=native#text/plain
 packages/hermes/src/p_i8.inc svneol=native#text/plain
 packages/hermes/src/p_muhmu.inc svneol=native#text/plain
+packages/hermes/src/x86_64/headx86_64.inc svneol=native#text/plain
+packages/hermes/src/x86_64/x86_64_i8.inc svneol=native#text/plain
 packages/httpd13/Makefile svneol=native#text/plain
 packages/httpd13/Makefile.fpc svneol=native#text/plain
 packages/httpd13/Makefile.fpc.fpcmake svneol=native#text/plain

+ 3 - 0
packages/hermes/Makefile.fpc.fpcmake

@@ -16,6 +16,9 @@ options_i386_go32v2=-dI386_ASSEMBLER
 options_i386_freebsd=-dI386_ASSEMBLER
 options_i386_haiku=-dI386_ASSEMBLER
 options_i386_beos=-dI386_ASSEMBLER
+options_x86_64_linux=-dX86_64_ASSEMBLER
+options_x86_64_win64=-dX86_64_ASSEMBLER
+options_x86_64_freebsd=-dX86_64_ASSEMBLER
 
 unitdir=
 targetdir=.

+ 11 - 1
packages/hermes/src/factconv.inc

@@ -58,10 +58,20 @@ end;
 
 const
   Factory_NumConverters = 45
-  {$IFDEF I386_ASSEMBLER}+27{$ENDIF I386_ASSEMBLER};
+  {$IFDEF I386_ASSEMBLER}+27{$ENDIF I386_ASSEMBLER}
+  {$IFDEF X86_64_ASSEMBLER}+1{$ENDIF X86_64_ASSEMBLER}
+  ;
 
   Factory_Converters: array [0..Factory_NumConverters - 1] of THermesFactoryStruct =
 (
+{$IFDEF X86_64_ASSEMBLER}
+  { ------ From 8 bit INDEXED ------- }
+  (s_bits:8;s_idx:True;s_r:0;s_g:0;s_b:0;s_a:0;
+   d_bits:32;d_idx:False;d_r:0;d_g:0;d_b:0;d_a:0;
+   loopnormal:@ConvertX86_64_index8_32;loopstretch:nil;
+   normal:@NotApplicable;stretch:nil;
+   dither:nil;ditherstretch:nil;processor:PROC_X86_64),
+{$ENDIF X86_64_ASSEMBLER}
 {$IFDEF I386_ASSEMBLER}
   { ------ From 32 RGB 888 - MMX PENTIUM II ---- }
   (s_bits:32;s_idx:False;s_r:$ff0000;s_g:$ff00;s_b:$ff;s_a:0;

+ 5 - 0
packages/hermes/src/hermes.pp

@@ -249,6 +249,8 @@ const
   PROC_GENERIC = 1;
   PROC_X86_PENTIUM = 2;
   PROC_MMX_PENTIUM = 4;
+  PROC_SSE2 = 8;
+  PROC_X86_64 = 16;
   HERMES_CONVERT_GENERIC = 65536;
 
 {$I hermconf.inc}
@@ -446,6 +448,9 @@ var
   {$I i386/headi386.inc}
   {$I i386/headmmx.inc}
 {$ENDIF I386_ASSEMBLER}
+{$IFDEF X86_64_ASSEMBLER}
+  {$I x86_64/headx86_64.inc}
+{$ENDIF X86_64_ASSEMBLER}
 {$I factconv.inc}
 {$I hermes_list.inc}
 {$I hermes_utility.inc}

+ 8 - 2
packages/hermes/src/hermes_factory.inc

@@ -34,10 +34,11 @@ var
   Processor: Integer;
 
 procedure Hermes_Factory_Init;
+{$IFDEF I386_ASSEMBLER}
 var
-  res: Integer;
+  res: Integer = 0;
+{$ENDIF I386_ASSEMBLER}
 begin
-  res := 0;
   Processor := PROC_GENERIC;
 
   {$IFDEF I386_ASSEMBLER}
@@ -50,6 +51,9 @@ begin
 {      Writeln('mmx!');}
     end;
   {$ENDIF I386_ASSEMBLER}
+  {$IFDEF X86_64_ASSEMBLER}
+    Processor := Processor or PROC_X86_64;
+  {$ENDIF X86_64_ASSEMBLER}
 end;
 
 function Hermes_Factory_getClearer(bits: Uint32): PHermesClearer;
@@ -259,7 +263,9 @@ end;
 function Hermes_Factory_getEqualConverter(bits: Integer): PHermesConverter;
 var
   found: Boolean;
+{$IFDEF I386_ASSEMBLER}
   asm_found: Integer;
+{$ENDIF I386_ASSEMBLER}
   c_found: Integer;
 begin
   found := False;

+ 1 - 0
packages/hermes/src/x86_64/headx86_64.inc

@@ -0,0 +1 @@
+{$I x86_64_i8.inc}

+ 122 - 0
packages/hermes/src/x86_64/x86_64_i8.inc

@@ -0,0 +1,122 @@
+{
+    Free Pascal version of the Hermes pixel conversion library.
+    Copyright (C) 2012  Nikolay Nikolov ([email protected])
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version
+    with the following modification:
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,and
+    to copy and distribute the resulting executable under terms of your choice,
+    provided that you also meet, for each linked independent module, the terms
+    and conditions of the license of that module. An independent module is a
+    module which is not derived from or based on this library. If you modify
+    this library, you may extend this exception to your version of the library,
+    but you are not obligated to do so. If you do not wish to do so, delete this
+    exception statement from your version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+}
+
+{$ASMMODE intel}
+
+{ -------------------------------------------------------------------------
+
+                             NORMAL CONVERTERS
+
+  ------------------------------------------------------------------------- }
+
+procedure ConvertX86_64_index8_32(iface: PHermesConverterInterface); cdecl;
+label
+  loop_start, loop_pre_remainder, loop_remainder, done;
+var
+  i: Integer;
+  s_pixel: Uint8;
+  d_pixel: Uint32;
+  source, dest: PUint8;
+  lookup: PUint32;
+  s_width: int64;
+begin
+  source := iface^.s_pixels;
+  dest := iface^.d_pixels;
+  lookup := iface^.lookup;
+  s_width := iface^.s_width;
+  if s_width <= 0 then
+    exit;
+  repeat
+{    for i := 0 to iface^.s_width - 1 do
+    begin}
+{      s_pixel := source^;
+      d_pixel := iface^.lookup[s_pixel];
+      PUint32(dest)^ := d_pixel or $FF;
+      Inc(source);
+      Inc(dest, 4);}
+      asm
+        mov rsi, [source]
+        mov rdi, [dest]
+        mov rbx, [lookup]
+        mov rcx, [s_width]
+        mov r8, rcx
+        and r8, 3
+        shr rcx, 2
+        test rcx, rcx
+        jz loop_pre_remainder
+
+        align 16
+loop_start:
+        movzx rax, byte [rsi]
+        movzx r9, byte [rsi + 1]
+        movzx r10, byte [rsi + 2]
+        movzx r11, byte [rsi + 3]
+
+        movd xmm0, dword [rbx + rax * 4]
+        movd xmm1, dword [rbx + r9 * 4]
+        movd xmm2, dword [rbx + r10 * 4]
+        movd xmm3, dword [rbx + r11 * 4]
+        punpckldq xmm0, xmm1
+        punpckldq xmm2, xmm3
+        punpcklqdq xmm0, xmm2
+
+        movntdq [rdi], xmm0
+        add rsi, 4
+        add rdi, 16
+        sub ecx, 1
+        jnz loop_start
+
+loop_pre_remainder:
+        mov rcx, r8
+        test rcx, rcx
+        jz done
+loop_remainder:
+        movzx rax, byte [rsi]
+        mov edx, dword [rbx + rax * 4]
+        movnti [rdi], edx
+        inc rsi
+        add rdi, 4
+        sub ecx, 1
+        jnz loop_remainder
+
+done:
+        mov [source], rsi
+        mov [dest], rdi
+      end;
+{    end;}
+    Inc(source, iface^.s_add);
+    Inc(dest, iface^.d_add);
+    Dec(iface^.s_height);
+  until iface^.s_height = 0;
+  asm
+    sfence
+  end;
+end;