|
@@ -0,0 +1,140 @@
|
|
|
+{
|
|
|
+ $Id$
|
|
|
+ This file is part of the Free Pascal run time library.
|
|
|
+ Copyright (c) 1999-2000 by the Free Pascal development team
|
|
|
+
|
|
|
+ Processor specific implementation of strlen
|
|
|
+
|
|
|
+ See the file COPYING.FPC, included in this distribution,
|
|
|
+ for details about the copyright.
|
|
|
+
|
|
|
+ This program is distributed in the hope that it will be useful,
|
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
+
|
|
|
+ **********************************************************************}
|
|
|
+{
|
|
|
+ Implemented using the code from glibc: libc/sysdeps/x86_64/strlen.S Version 1.2
|
|
|
+}
|
|
|
+asm
|
|
|
+ movq %rdi, %rcx { Duplicate source pointer. }
|
|
|
+ andl $7, %ecx { mask alignment bits }
|
|
|
+ movq %rdi, %rax { duplicate destination. }
|
|
|
+ jz LFPC_STRLEN_1 { aligned => start loop }
|
|
|
+
|
|
|
+ neg %ecx { We need to align to 8 bytes. }
|
|
|
+ addl $8,%ecx
|
|
|
+ { Search the first bytes directly. }
|
|
|
+LFPC_STRLEN_0:
|
|
|
+ cmpb $0x0,(%rax) { is byte NUL? }
|
|
|
+ je LFPC_STRLEN_2 { yes => return }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+ decl %ecx
|
|
|
+ jnz LFPC_STRLEN_0
|
|
|
+
|
|
|
+LFPC_STRLEN_1:
|
|
|
+ movq $0xfefefefefefefeff,%r8 { Save magic. }
|
|
|
+
|
|
|
+ .p2align 4 { Align loop. }
|
|
|
+LFPC_STRLEN_4: { Main Loop is unrolled 4 times. }
|
|
|
+ { First unroll. }
|
|
|
+ movq (%rax), %rcx { get double word (= 8 bytes) in question }
|
|
|
+ addq $8,%rax { adjust pointer for next word }
|
|
|
+ movq %r8, %rdx { magic value }
|
|
|
+ addq %rcx, %rdx { add the magic value to the word. We get
|
|
|
+ carry bits reported for each byte which
|
|
|
+ is *not* 0 }
|
|
|
+ jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
|
|
|
+ xorq %rcx, %rdx { (word+magic)^word }
|
|
|
+ orq %r8, %rdx { set all non-carry bits }
|
|
|
+ incq %rdx { add 1: if one carry bit was *not* set
|
|
|
+ the addition will not result in 0. }
|
|
|
+ jnz LFPC_STRLEN_3 { found NUL => return pointer }
|
|
|
+
|
|
|
+ { Second unroll. }
|
|
|
+ movq (%rax), %rcx { get double word (= 8 bytes) in question }
|
|
|
+ addq $8,%rax { adjust pointer for next word }
|
|
|
+ movq %r8, %rdx { magic value }
|
|
|
+ addq %rcx, %rdx { add the magic value to the word. We get
|
|
|
+ carry bits reported for each byte which
|
|
|
+ is *not* 0 }
|
|
|
+ jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
|
|
|
+ xorq %rcx, %rdx { (word+magic)^word }
|
|
|
+ orq %r8, %rdx { set all non-carry bits }
|
|
|
+ incq %rdx { add 1: if one carry bit was *not* set
|
|
|
+ the addition will not result in 0. }
|
|
|
+ jnz LFPC_STRLEN_3 { found NUL => return pointer }
|
|
|
+
|
|
|
+ { Third unroll. }
|
|
|
+ movq (%rax), %rcx { get double word (= 8 bytes) in question }
|
|
|
+ addq $8,%rax { adjust pointer for next word }
|
|
|
+ movq %r8, %rdx { magic value }
|
|
|
+ addq %rcx, %rdx { add the magic value to the word. We get
|
|
|
+ carry bits reported for each byte which
|
|
|
+ is *not* 0 }
|
|
|
+ jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
|
|
|
+ xorq %rcx, %rdx { (word+magic)^word }
|
|
|
+ orq %r8, %rdx { set all non-carry bits }
|
|
|
+ incq %rdx { add 1: if one carry bit was *not* set
|
|
|
+ the addition will not result in 0. }
|
|
|
+ jnz LFPC_STRLEN_3 { found NUL => return pointer }
|
|
|
+
|
|
|
+ { Fourth unroll. }
|
|
|
+ movq (%rax), %rcx { get double word (= 8 bytes) in question }
|
|
|
+ addq $8,%rax { adjust pointer for next word }
|
|
|
+ movq %r8, %rdx { magic value }
|
|
|
+ addq %rcx, %rdx { add the magic value to the word. We get
|
|
|
+ carry bits reported for each byte which
|
|
|
+ is *not* 0 }
|
|
|
+ jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
|
|
|
+ xorq %rcx, %rdx { (word+magic)^word }
|
|
|
+ orq %r8, %rdx { set all non-carry bits }
|
|
|
+ incq %rdx { add 1: if one carry bit was *not* set
|
|
|
+ the addition will not result in 0. }
|
|
|
+ jz LFPC_STRLEN_4 { no NUL found => continue loop }
|
|
|
+
|
|
|
+ .p2align 4 { Align, it's a jump target. }
|
|
|
+LFPC_STRLEN_3:
|
|
|
+ subq $8,%rax { correct pointer increment. }
|
|
|
+
|
|
|
+ testb %cl, %cl { is first byte NUL? }
|
|
|
+ jz LFPC_STRLEN_2 { yes => return }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+
|
|
|
+ testb %ch, %ch { is second byte NUL? }
|
|
|
+ jz LFPC_STRLEN_2 { yes => return }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+
|
|
|
+ testl $0x00ff0000, %ecx { is third byte NUL? }
|
|
|
+ jz LFPC_STRLEN_2 { yes => return pointer }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+
|
|
|
+ testl $0xff000000, %ecx { is fourth byte NUL? }
|
|
|
+ jz LFPC_STRLEN_2 { yes => return pointer }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+
|
|
|
+ shrq $32, %rcx { look at other half. }
|
|
|
+
|
|
|
+ testb %cl, %cl { is first byte NUL? }
|
|
|
+ jz LFPC_STRLEN_2 { yes => return }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+
|
|
|
+ testb %ch, %ch { is second byte NUL? }
|
|
|
+ jz LFPC_STRLEN_2 { yes => return }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+
|
|
|
+ testl $0xff0000, %ecx { is third byte NUL? }
|
|
|
+ jz LFPC_STRLEN_2 { yes => return pointer }
|
|
|
+ incq %rax { increment pointer }
|
|
|
+LFPC_STRLEN_2:
|
|
|
+ subq %rdi, %rax { compute difference to string start }
|
|
|
+ ret
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+{
|
|
|
+ $Log$
|
|
|
+ Revision 1.1 2003-04-30 16:36:39 florian
|
|
|
+ + support for generic pchar routines added
|
|
|
+ + some basic rtl stuff for x86-64 added
|
|
|
+}
|