Browse Source

- Intergrate i386/strlen.inc and remove it.
+ int_str assembler implementations for i386
+ fpc_shortstr_to_shortstr assembler implementation for ARM
+ fpc_shortstr_assign assembler implementation for ARM
+ fpc_Pchar_length assembler implementation for ARM

git-svn-id: trunk@9582 -

daniel 17 years ago
parent
commit
d8bffd27fc
4 changed files with 254 additions and 37 deletions
  1. 0 1
      .gitattributes
  2. 163 0
      rtl/arm/arm.inc
  3. 91 1
      rtl/i386/i386.inc
  4. 0 35
      rtl/i386/strlen.inc

+ 0 - 1
.gitattributes

@@ -4814,7 +4814,6 @@ rtl/i386/setjump.inc svneol=native#text/plain
 rtl/i386/setjumph.inc svneol=native#text/plain
 rtl/i386/strings.inc svneol=native#text/plain
 rtl/i386/stringss.inc svneol=native#text/plain
-rtl/i386/strlen.inc svneol=native#text/plain
 rtl/i386/strpas.inc svneol=native#text/plain
 rtl/inc/aliases.inc svneol=native#text/plain
 rtl/inc/astrings.inc svneol=native#text/plain

+ 163 - 0
rtl/arm/arm.inc

@@ -307,6 +307,168 @@ end;
 
 {$endif FPC_SYSTEM_HAS_MOVE}
 
+{****************************************************************************
+                                 String
+****************************************************************************}
+
+{$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
+{$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
+
+{$ifndef FPC_STRTOSHORTSTRINGPROC}
+function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
+{$else}
+procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
+{$endif}
+{r0: __RESULT
+ r1: len
+ r2: sstr}
+
+asm
+    ldrb r12,[r2],#1
+    cmp  r12,r1
+    movgt r1,r12
+    strb r12,[r0],#1
+    cmp  r12,#6 (* 6 seems to be the break even point. *)
+    blt  .LStartTailCopy
+    (* Align destination on 32bits. This is the only place where unrolling
+       really seems to help, since in the common case, sstr is aligned on
+       32 bits, therefore in the common case we need to copy 3 bytes to
+       align, i.e. in the case of a loop, you wouldn't branch out early.*)
+    rsb  r3,r0,#0
+    ands  r3,r3,#3
+    sub  r12,r12,r3
+    ldrneb r1,[r2],#1
+    strneb r1,[r0],#1
+    subnes  r3,r3,#1
+    ldrneb r1,[r2],#1
+    strneb r1,[r0],#1
+    subnes  r3,r3,#1
+    ldrneb r1,[r2],#1
+    strneb r1,[r0],#1
+    subnes  r3,r3,#1
+.LDoneAlign:
+    (* Destination should be aligned now, but source might not be aligned,
+       if this is the case, do a byte-per-byte copy. *)
+    tst r2,#3
+    bne .LStartTailCopy
+    (* Start the main copy, 32 bit at a time. *)
+    movs r3,r12,lsr #2
+    and r12,r12,#3
+    beq  .LStartTailCopy
+.LNext4bytes:
+    (* Unrolling this loop would save a little bit of time for long strings
+       (>20 chars), but alas, it hurts for short strings and they are the
+       common case.*)
+    ldrne r1,[r2],#4
+    strne r1,[r0],#4
+    subnes  r3,r3,#1
+    bne .LNext4bytes
+.LStartTailCopy:
+    (* Do remaining bytes. *)
+    cmp r12,#0
+    beq .LDoneTail
+.LNextChar3:
+    ldrb r1,[r2],#1
+    strb r1,[r0],#1
+    subs  r12,r12,#1
+    bne .LNextChar3
+.LDoneTail:
+end;
+
+procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
+
+{r0: len
+ r1: sstr
+ r2: dstr}
+
+asm
+    ldrb r12,[r1],#1
+    cmp  r12,r0
+    movgt r0,r12
+    strb r12,[r2],#1
+    cmp  r12,#6 (* 6 seems to be the break even point. *)
+    blt  .LStartTailCopy
+    (* Align destination on 32bits. This is the only place where unrolling
+       really seems to help, since in the common case, sstr is aligned on
+       32 bits, therefore in the common case we need to copy 3 bytes to
+       align, i.e. in the case of a loop, you wouldn't branch out early.*)
+    rsb  r3,r2,#0
+    ands  r3,r3,#3
+    sub  r12,r12,r3
+    ldrneb r0,[r1],#1
+    strneb r0,[r2],#1
+    subnes  r3,r3,#1
+    ldrneb r0,[r1],#1
+    strneb r0,[r2],#1
+    subnes  r3,r3,#1
+    ldrneb r0,[r1],#1
+    strneb r0,[r2],#1
+    subnes  r3,r3,#1
+.LDoneAlign:
+    (* Destination should be aligned now, but source might not be aligned,
+       if this is the case, do a byte-per-byte copy. *)
+    tst r1,#3
+    bne .LStartTailCopy
+    (* Start the main copy, 32 bit at a time. *)
+    movs r3,r12,lsr #2
+    and r12,r12,#3
+    beq  .LStartTailCopy
+.LNext4bytes:
+    (* Unrolling this loop would save a little bit of time for long strings
+       (>20 chars), but alas, it hurts for short strings and they are the
+       common case.*)
+    ldrne r0,[r1],#4
+    strne r0,[r2],#4
+    subnes  r3,r3,#1
+    bne .LNext4bytes
+.LStartTailCopy:
+    (* Do remaining bytes. *)
+    cmp r12,#0
+    beq .LDoneTail
+.LNextChar3:
+    ldrb r0,[r1],#1
+    strb r0,[r2],#1
+    subs  r12,r12,#1
+    bne .LNextChar3
+.LDoneTail:
+end;
+{$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
+
+{$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
+{$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
+function fpc_Pchar_length(p:Pchar):longint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
+
+asm
+    mov r1,r0
+.Lnextchar:
+    (*Are we aligned?*)
+    tst r1,#3
+    bne .Ltest_unaligned    (*No, do byte per byte.*)
+    ldr r3,.L01010101
+.Ltest_aligned:
+    (*Aligned, load 4 bytes at a time.*)
+    ldr r12,[r1],#4
+    (*Check wether r12 contains a 0 byte.*)
+    sub r2,r12,r3
+    mvn r12,r12
+    and r2,r2,r12
+    ands r2,r2,r3,lsl #7    (*r3 lsl 7 = $80808080*)
+    beq .Ltest_aligned      (*No 0 byte, repeat.*)
+    sub r1,r1,#4
+.Ltest_unaligned:
+    ldrb r12,[r1],#1
+    cmp r12,#1              (*r12<1 same as r12=0, but result in carry flag*)
+    bcs .Lnextchar
+    (*Dirty trick: we need to subtract 1 extra because we have counted the
+      terminating 0, due to the known carry flag sbc can do this.*)
+    sbc r0,r1,r0
+    mov pc,lr
+.L01010101:
+    .long 0x01010101
+end;
+{$endif}
+
+
 var
   fpc_system_lock: longint; export name 'fpc_system_lock';
 
@@ -439,3 +601,4 @@ end;
 
 {include hand-optimized assembler division code}
 {$i divide.inc}
+

+ 91 - 1
rtl/i386/i386.inc

@@ -1013,7 +1013,25 @@ end;
 {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
 {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
 function fpc_pchar_length(p:pchar):longint;assembler;[public,alias:'FPC_PCHAR_LENGTH']; compilerproc;
-{$include strlen.inc}
+
+var
+  saveedi : longint;
+asm
+        movl    %edi,saveedi
+{$ifdef REGCALL}
+        movl    %eax,%edi
+{$else}
+        movl    p,%edi
+{$endif}
+        movl    $0xffffffff,%ecx
+        xorl    %eax,%eax
+        cld
+        repne
+        scasb
+        movl    $0xfffffffe,%eax
+        subl    %ecx,%eax
+        movl    saveedi,%edi
+end;
 {$endif FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
 
 {$IFNDEF INTERNAL_BACKTRACE}
@@ -1073,7 +1091,79 @@ Function Sptr : Pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$end
 asm
         movl    %esp,%eax
 end;
+{****************************************************************************
+                                 Str()
+****************************************************************************}
+
+{$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
+{$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
+
+label str_int_shortcut;
+
+{$asmmode intel}
+
+procedure int_str(l:longword;var s:string);assembler;nostackframe;
+
+asm
+  push edi
+  push ebx
+  mov edi,edx
+  xor edx,edx
+  jmp str_int_shortcut
+end;
+
+procedure int_str(l:longint;var s:string);assembler;nostackframe;
+
+{Optimized for speed, but balanced with size.}
+
+const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
+                                      100000,1000000,10000000,
+                                      100000000,1000000000);
+
+asm
+  push edi
+  push ebx
+  mov edi,edx
+
+  { Calculate absolute value and put sign in edx}
+  cdq
+  xor eax,edx
+  sub eax,edx
+  neg edx
+str_int_shortcut:
+  {Calculate amount of digits in ecx.}
+  bsr ecx,eax
+  inc ecx
+  imul ecx,1233
+  shr ecx,12
+  cmp eax,[digits+4*ecx]
+  cmc
+  adc ecx,0 {Nr. digits ready in ecx.}
+
+  {Write length & sign.}
+  lea ebx,[edx+ecx]
+  mov bh,'-'
+  mov [edi],bx
+  add edi,edx
+
+  {Write out digits.}
+  mov edx,eax
+@loop:
+  mov eax,$cccccccd    {Divide by 10 using mul+shr}
+  lea ebx,[edx+'0']    {Pre-add '0'}
+  mul edx
+  shr edx,3
+  lea eax,[8*edx+edx]  {x mod 10 = x-10*(x div 10)}
+  sub ebx,edx
+  sub ebx,eax
+  mov [edi+ecx],bl
+  dec ecx
+  jnz @loop
+  pop ebx
+  pop edi
+end;
 
+{$asmmode att}
 
 {****************************************************************************
                                Bounds Check

+ 0 - 35
rtl/i386/strlen.inc

@@ -1,35 +0,0 @@
-{
-    This file is part of the Free Pascal run time library.
-    Copyright (c) 1999-2000 by the Free Pascal development team
-
-    Processor specific implementation of strlen
-
-    See the file COPYING.FPC, included in this distribution,
-    for details about the copyright.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
- **********************************************************************}
-
-var
-  saveedi : longint;
-asm
-        movl    %edi,saveedi
-{$ifdef REGCALL}
-        movl    %eax,%edi
-{$else}
-        movl    p,%edi
-{$endif}
-        movl    $0xffffffff,%ecx
-        xorl    %eax,%eax
-        cld
-        repne
-        scasb
-        movl    $0xfffffffe,%eax
-        subl    %ecx,%eax
-        movl    saveedi,%edi
-end;
-
-