17 年之前 · d8bffd27fc
--- a/.gitattributes
+++ b/.gitattributes
@@ -4814,7 +4814,6 @@ rtl/i386/setjump.inc svneol=native#text/plain
 
				 rtl/i386/setjumph.inc svneol=native#text/plain
			
 
				 rtl/i386/strings.inc svneol=native#text/plain
			
 
				 rtl/i386/stringss.inc svneol=native#text/plain
			
 
				-rtl/i386/strlen.inc svneol=native#text/plain
			
 
				 rtl/i386/strpas.inc svneol=native#text/plain
			
 
				 rtl/inc/aliases.inc svneol=native#text/plain
			
 
				 rtl/inc/astrings.inc svneol=native#text/plain
			
--- a/rtl/arm/arm.inc
+++ b/rtl/arm/arm.inc
@@ -307,6 +307,168 @@ end;
 
				 
			
 
				 {$endif FPC_SYSTEM_HAS_MOVE}
			
 
				 
			
 
				+{****************************************************************************
			
 
				+                                 String
			
 
				+****************************************************************************}
			
 
				+
			
 
				+{$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
			
 
				+{$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
			
 
				+
			
 
				+{$ifndef FPC_STRTOSHORTSTRINGPROC}
			
 
				+function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
			
 
				+{$else}
			
 
				+procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
			
 
				+{$endif}
			
 
				+{r0: __RESULT
			
 
				+ r1: len
			
 
				+ r2: sstr}
			
 
				+
			
 
				+asm
			
 
				+    ldrb r12,[r2],#1
			
 
				+    cmp  r12,r1
			
 
				+    movgt r1,r12
			
 
				+    strb r12,[r0],#1
			
 
				+    cmp  r12,#6 (* 6 seems to be the break even point. *)
			
 
				+    blt  .LStartTailCopy
			
 
				+    (* Align destination on 32bits. This is the only place where unrolling
			
 
				+       really seems to help, since in the common case, sstr is aligned on
			
 
				+       32 bits, therefore in the common case we need to copy 3 bytes to
			
 
				+       align, i.e. in the case of a loop, you wouldn't branch out early.*)
			
 
				+    rsb  r3,r0,#0
			
 
				+    ands  r3,r3,#3
			
 
				+    sub  r12,r12,r3
			
 
				+    ldrneb r1,[r2],#1
			
 
				+    strneb r1,[r0],#1
			
 
				+    subnes  r3,r3,#1
			
 
				+    ldrneb r1,[r2],#1
			
 
				+    strneb r1,[r0],#1
			
 
				+    subnes  r3,r3,#1
			
 
				+    ldrneb r1,[r2],#1
			
 
				+    strneb r1,[r0],#1
			
 
				+    subnes  r3,r3,#1
			
 
				+.LDoneAlign:
			
 
				+    (* Destination should be aligned now, but source might not be aligned,
			
 
				+       if this is the case, do a byte-per-byte copy. *)
			
 
				+    tst r2,#3
			
 
				+    bne .LStartTailCopy
			
 
				+    (* Start the main copy, 32 bit at a time. *)
			
 
				+    movs r3,r12,lsr #2
			
 
				+    and r12,r12,#3
			
 
				+    beq  .LStartTailCopy
			
 
				+.LNext4bytes:
			
 
				+    (* Unrolling this loop would save a little bit of time for long strings
			
 
				+       (>20 chars), but alas, it hurts for short strings and they are the
			
 
				+       common case.*)
			
 
				+    ldrne r1,[r2],#4
			
 
				+    strne r1,[r0],#4
			
 
				+    subnes  r3,r3,#1
			
 
				+    bne .LNext4bytes
			
 
				+.LStartTailCopy:
			
 
				+    (* Do remaining bytes. *)
			
 
				+    cmp r12,#0
			
 
				+    beq .LDoneTail
			
 
				+.LNextChar3:
			
 
				+    ldrb r1,[r2],#1
			
 
				+    strb r1,[r0],#1
			
 
				+    subs  r12,r12,#1
			
 
				+    bne .LNextChar3
			
 
				+.LDoneTail:
			
 
				+end;
			
 
				+
			
 
				+procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
			
 
				+
			
 
				+{r0: len
			
 
				+ r1: sstr
			
 
				+ r2: dstr}
			
 
				+
			
 
				+asm
			
 
				+    ldrb r12,[r1],#1
			
 
				+    cmp  r12,r0
			
 
				+    movgt r0,r12
			
 
				+    strb r12,[r2],#1
			
 
				+    cmp  r12,#6 (* 6 seems to be the break even point. *)
			
 
				+    blt  .LStartTailCopy
			
 
				+    (* Align destination on 32bits. This is the only place where unrolling
			
 
				+       really seems to help, since in the common case, sstr is aligned on
			
 
				+       32 bits, therefore in the common case we need to copy 3 bytes to
			
 
				+       align, i.e. in the case of a loop, you wouldn't branch out early.*)
			
 
				+    rsb  r3,r2,#0
			
 
				+    ands  r3,r3,#3
			
 
				+    sub  r12,r12,r3
			
 
				+    ldrneb r0,[r1],#1
			
 
				+    strneb r0,[r2],#1
			
 
				+    subnes  r3,r3,#1
			
 
				+    ldrneb r0,[r1],#1
			
 
				+    strneb r0,[r2],#1
			
 
				+    subnes  r3,r3,#1
			
 
				+    ldrneb r0,[r1],#1
			
 
				+    strneb r0,[r2],#1
			
 
				+    subnes  r3,r3,#1
			
 
				+.LDoneAlign:
			
 
				+    (* Destination should be aligned now, but source might not be aligned,
			
 
				+       if this is the case, do a byte-per-byte copy. *)
			
 
				+    tst r1,#3
			
 
				+    bne .LStartTailCopy
			
 
				+    (* Start the main copy, 32 bit at a time. *)
			
 
				+    movs r3,r12,lsr #2
			
 
				+    and r12,r12,#3
			
 
				+    beq  .LStartTailCopy
			
 
				+.LNext4bytes:
			
 
				+    (* Unrolling this loop would save a little bit of time for long strings
			
 
				+       (>20 chars), but alas, it hurts for short strings and they are the
			
 
				+       common case.*)
			
 
				+    ldrne r0,[r1],#4
			
 
				+    strne r0,[r2],#4
			
 
				+    subnes  r3,r3,#1
			
 
				+    bne .LNext4bytes
			
 
				+.LStartTailCopy:
			
 
				+    (* Do remaining bytes. *)
			
 
				+    cmp r12,#0
			
 
				+    beq .LDoneTail
			
 
				+.LNextChar3:
			
 
				+    ldrb r0,[r1],#1
			
 
				+    strb r0,[r2],#1
			
 
				+    subs  r12,r12,#1
			
 
				+    bne .LNextChar3
			
 
				+.LDoneTail:
			
 
				+end;
			
 
				+{$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
			
 
				+
			
 
				+{$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
			
 
				+{$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
			
 
				+function fpc_Pchar_length(p:Pchar):longint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
			
 
				+
			
 
				+asm
			
 
				+    mov r1,r0
			
 
				+.Lnextchar:
			
 
				+    (*Are we aligned?*)
			
 
				+    tst r1,#3
			
 
				+    bne .Ltest_unaligned    (*No, do byte per byte.*)
			
 
				+    ldr r3,.L01010101
			
 
				+.Ltest_aligned:
			
 
				+    (*Aligned, load 4 bytes at a time.*)
			
 
				+    ldr r12,[r1],#4
			
 
				+    (*Check wether r12 contains a 0 byte.*)
			
 
				+    sub r2,r12,r3
			
 
				+    mvn r12,r12
			
 
				+    and r2,r2,r12
			
 
				+    ands r2,r2,r3,lsl #7    (*r3 lsl 7 = $80808080*)
			
 
				+    beq .Ltest_aligned      (*No 0 byte, repeat.*)
			
 
				+    sub r1,r1,#4
			
 
				+.Ltest_unaligned:
			
 
				+    ldrb r12,[r1],#1
			
 
				+    cmp r12,#1              (*r12<1 same as r12=0, but result in carry flag*)
			
 
				+    bcs .Lnextchar
			
 
				+    (*Dirty trick: we need to subtract 1 extra because we have counted the
			
 
				+      terminating 0, due to the known carry flag sbc can do this.*)
			
 
				+    sbc r0,r1,r0
			
 
				+    mov pc,lr
			
 
				+.L01010101:
			
 
				+    .long 0x01010101
			
 
				+end;
			
 
				+{$endif}
			
 
				+
			
 
				+
			
 
				 var
			
 
				   fpc_system_lock: longint; export name 'fpc_system_lock';
			
 
				 
			
@@ -439,3 +601,4 @@ end;
 
				 
			
 
				 {include hand-optimized assembler division code}
			
 
				 {$i divide.inc}
			
 
				+
			
--- a/rtl/i386/i386.inc
+++ b/rtl/i386/i386.inc
@@ -1013,7 +1013,25 @@ end;
 
				 {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
			
 
				 {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
			
 
				 function fpc_pchar_length(p:pchar):longint;assembler;[public,alias:'FPC_PCHAR_LENGTH']; compilerproc;
			
 
				-{$include strlen.inc}
			
 
				+
			
 
				+var
			
 
				+  saveedi : longint;
			
 
				+asm
			
 
				+        movl    %edi,saveedi
			
 
				+{$ifdef REGCALL}
			
 
				+        movl    %eax,%edi
			
 
				+{$else}
			
 
				+        movl    p,%edi
			
 
				+{$endif}
			
 
				+        movl    $0xffffffff,%ecx
			
 
				+        xorl    %eax,%eax
			
 
				+        cld
			
 
				+        repne
			
 
				+        scasb
			
 
				+        movl    $0xfffffffe,%eax
			
 
				+        subl    %ecx,%eax
			
 
				+        movl    saveedi,%edi
			
 
				+end;
			
 
				 {$endif FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
			
 
				 
			
 
				 {$IFNDEF INTERNAL_BACKTRACE}
			
@@ -1073,7 +1091,79 @@ Function Sptr : Pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$end
 
				 asm
			
 
				         movl    %esp,%eax
			
 
				 end;
			
 
				+{****************************************************************************
			
 
				+                                 Str()
			
 
				+****************************************************************************}
			
 
				+
			
 
				+{$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
			
 
				+{$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
			
 
				+
			
 
				+label str_int_shortcut;
			
 
				+
			
 
				+{$asmmode intel}
			
 
				+
			
 
				+procedure int_str(l:longword;var s:string);assembler;nostackframe;
			
 
				+
			
 
				+asm
			
 
				+  push edi
			
 
				+  push ebx
			
 
				+  mov edi,edx
			
 
				+  xor edx,edx
			
 
				+  jmp str_int_shortcut
			
 
				+end;
			
 
				+
			
 
				+procedure int_str(l:longint;var s:string);assembler;nostackframe;
			
 
				+
			
 
				+{Optimized for speed, but balanced with size.}
			
 
				+
			
 
				+const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
			
 
				+                                      100000,1000000,10000000,
			
 
				+                                      100000000,1000000000);
			
 
				+
			
 
				+asm
			
 
				+  push edi
			
 
				+  push ebx
			
 
				+  mov edi,edx
			
 
				+
			
 
				+  { Calculate absolute value and put sign in edx}
			
 
				+  cdq
			
 
				+  xor eax,edx
			
 
				+  sub eax,edx
			
 
				+  neg edx
			
 
				+str_int_shortcut:
			
 
				+  {Calculate amount of digits in ecx.}
			
 
				+  bsr ecx,eax
			
 
				+  inc ecx
			
 
				+  imul ecx,1233
			
 
				+  shr ecx,12
			
 
				+  cmp eax,[digits+4*ecx]
			
 
				+  cmc
			
 
				+  adc ecx,0 {Nr. digits ready in ecx.}
			
 
				+
			
 
				+  {Write length & sign.}
			
 
				+  lea ebx,[edx+ecx]
			
 
				+  mov bh,'-'
			
 
				+  mov [edi],bx
			
 
				+  add edi,edx
			
 
				+
			
 
				+  {Write out digits.}
			
 
				+  mov edx,eax
			
 
				+@loop:
			
 
				+  mov eax,$cccccccd    {Divide by 10 using mul+shr}
			
 
				+  lea ebx,[edx+'0']    {Pre-add '0'}
			
 
				+  mul edx
			
 
				+  shr edx,3
			
 
				+  lea eax,[8*edx+edx]  {x mod 10 = x-10*(x div 10)}
			
 
				+  sub ebx,edx
			
 
				+  sub ebx,eax
			
 
				+  mov [edi+ecx],bl
			
 
				+  dec ecx
			
 
				+  jnz @loop
			
 
				+  pop ebx
			
 
				+  pop edi
			
 
				+end;
			
 
				 
			
 
				+{$asmmode att}
			
 
				 
			
 
				 {****************************************************************************
			
 
				                                Bounds Check
			
--- a/rtl/i386/strlen.inc
+++ b/rtl/i386/strlen.inc
@@ -1,35 +0,0 @@
 
				-{
			
 
				-    This file is part of the Free Pascal run time library.
			
 
				-    Copyright (c) 1999-2000 by the Free Pascal development team
			
 
				-
			
 
				-    Processor specific implementation of strlen
			
 
				-
			
 
				-    See the file COPYING.FPC, included in this distribution,
			
 
				-    for details about the copyright.
			
 
				-
			
 
				-    This program is distributed in the hope that it will be useful,
			
 
				-    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				-
			
 
				- **********************************************************************}
			
 
				-
			
 
				-var
			
 
				-  saveedi : longint;
			
 
				-asm
			
 
				-        movl    %edi,saveedi
			
 
				-{$ifdef REGCALL}
			
 
				-        movl    %eax,%edi
			
 
				-{$else}
			
 
				-        movl    p,%edi
			
 
				-{$endif}
			
 
				-        movl    $0xffffffff,%ecx
			
 
				-        xorl    %eax,%eax
			
 
				-        cld
			
 
				-        repne
			
 
				-        scasb
			
 
				-        movl    $0xfffffffe,%eax
			
 
				-        subl    %ecx,%eax
			
 
				-        movl    saveedi,%edi
			
 
				-end;
			
 
				-
			
 
				-