strlen.inc 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 1999-2000 by the Free Pascal development team
  5. Processor specific implementation of strlen
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {
  13. Implemented using the code from glibc: libc/sysdeps/x86_64/strlen.S Version 1.2
  14. }
  15. asm
  16. movq %rdi, %rcx { Duplicate source pointer. }
  17. andl $7, %ecx { mask alignment bits }
  18. movq %rdi, %rax { duplicate destination. }
  19. jz LFPC_STRLEN_1 { aligned => start loop }
  20. neg %ecx { We need to align to 8 bytes. }
  21. addl $8,%ecx
  22. { Search the first bytes directly. }
  23. LFPC_STRLEN_0:
  24. cmpb $0x0,(%rax) { is byte NUL? }
  25. je LFPC_STRLEN_2 { yes => return }
  26. incq %rax { increment pointer }
  27. decl %ecx
  28. jnz LFPC_STRLEN_0
  29. LFPC_STRLEN_1:
  30. movq $0xfefefefefefefeff,%r8 { Save magic. }
  31. .p2align 4 { Align loop. }
  32. LFPC_STRLEN_4: { Main Loop is unrolled 4 times. }
  33. { First unroll. }
  34. movq (%rax), %rcx { get double word (= 8 bytes) in question }
  35. addq $8,%rax { adjust pointer for next word }
  36. movq %r8, %rdx { magic value }
  37. addq %rcx, %rdx { add the magic value to the word. We get
  38. carry bits reported for each byte which
  39. is *not* 0 }
  40. jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
  41. xorq %rcx, %rdx { (word+magic)^word }
  42. orq %r8, %rdx { set all non-carry bits }
  43. incq %rdx { add 1: if one carry bit was *not* set
  44. the addition will not result in 0. }
  45. jnz LFPC_STRLEN_3 { found NUL => return pointer }
  46. { Second unroll. }
  47. movq (%rax), %rcx { get double word (= 8 bytes) in question }
  48. addq $8,%rax { adjust pointer for next word }
  49. movq %r8, %rdx { magic value }
  50. addq %rcx, %rdx { add the magic value to the word. We get
  51. carry bits reported for each byte which
  52. is *not* 0 }
  53. jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
  54. xorq %rcx, %rdx { (word+magic)^word }
  55. orq %r8, %rdx { set all non-carry bits }
  56. incq %rdx { add 1: if one carry bit was *not* set
  57. the addition will not result in 0. }
  58. jnz LFPC_STRLEN_3 { found NUL => return pointer }
  59. { Third unroll. }
  60. movq (%rax), %rcx { get double word (= 8 bytes) in question }
  61. addq $8,%rax { adjust pointer for next word }
  62. movq %r8, %rdx { magic value }
  63. addq %rcx, %rdx { add the magic value to the word. We get
  64. carry bits reported for each byte which
  65. is *not* 0 }
  66. jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
  67. xorq %rcx, %rdx { (word+magic)^word }
  68. orq %r8, %rdx { set all non-carry bits }
  69. incq %rdx { add 1: if one carry bit was *not* set
  70. the addition will not result in 0. }
  71. jnz LFPC_STRLEN_3 { found NUL => return pointer }
  72. { Fourth unroll. }
  73. movq (%rax), %rcx { get double word (= 8 bytes) in question }
  74. addq $8,%rax { adjust pointer for next word }
  75. movq %r8, %rdx { magic value }
  76. addq %rcx, %rdx { add the magic value to the word. We get
  77. carry bits reported for each byte which
  78. is *not* 0 }
  79. jnc LFPC_STRLEN_3 { highest byte is NUL => return pointer }
  80. xorq %rcx, %rdx { (word+magic)^word }
  81. orq %r8, %rdx { set all non-carry bits }
  82. incq %rdx { add 1: if one carry bit was *not* set
  83. the addition will not result in 0. }
  84. jz LFPC_STRLEN_4 { no NUL found => continue loop }
  85. .p2align 4 { Align, it's a jump target. }
  86. LFPC_STRLEN_3:
  87. subq $8,%rax { correct pointer increment. }
  88. testb %cl, %cl { is first byte NUL? }
  89. jz LFPC_STRLEN_2 { yes => return }
  90. incq %rax { increment pointer }
  91. testb %ch, %ch { is second byte NUL? }
  92. jz LFPC_STRLEN_2 { yes => return }
  93. incq %rax { increment pointer }
  94. testl $0x00ff0000, %ecx { is third byte NUL? }
  95. jz LFPC_STRLEN_2 { yes => return pointer }
  96. incq %rax { increment pointer }
  97. testl $0xff000000, %ecx { is fourth byte NUL? }
  98. jz LFPC_STRLEN_2 { yes => return pointer }
  99. incq %rax { increment pointer }
  100. shrq $32, %rcx { look at other half. }
  101. testb %cl, %cl { is first byte NUL? }
  102. jz LFPC_STRLEN_2 { yes => return }
  103. incq %rax { increment pointer }
  104. testb %ch, %ch { is second byte NUL? }
  105. jz LFPC_STRLEN_2 { yes => return }
  106. incq %rax { increment pointer }
  107. testl $0xff0000, %ecx { is third byte NUL? }
  108. jz LFPC_STRLEN_2 { yes => return pointer }
  109. incq %rax { increment pointer }
  110. LFPC_STRLEN_2:
  111. subq %rdi, %rax { compute difference to string start }
  112. ret
  113. end;
  114. {
  115. $Log$
  116. Revision 1.1 2003-04-30 16:36:39 florian
  117. + support for generic pchar routines added
  118. + some basic rtl stuff for x86-64 added
  119. }