Sfoglia il codice sorgente

new division helpers for ARM by Nico Erfurth. on our ARMv5 core hardware they're 22%-36% faster than the generic ones for the most common case.

git-svn-id: trunk@28273 -
Károly Balogh 11 anni fa
parent
commit
1a4d6d79c5
3 ha cambiato i file con 193 aggiunte e 2 eliminazioni
  1. 1 0
      .gitattributes
  2. 1 2
      rtl/arm/arm.inc
  3. 191 0
      rtl/arm/divide.inc

+ 1 - 0
.gitattributes

@@ -7832,6 +7832,7 @@ rtl/android/mipsel/dllprt0.as svneol=native#text/plain
 rtl/android/mipsel/prt0.as svneol=native#text/plain
 rtl/arm/arm.inc svneol=native#text/plain
 rtl/arm/armdefines.inc svneol=native#text/plain
+rtl/arm/divide.inc svneol=native#text/plain
 rtl/arm/int64p.inc svneol=native#text/plain
 rtl/arm/makefile.cpu svneol=native#text/plain
 rtl/arm/math.inc svneol=native#text/plain

+ 1 - 2
rtl/arm/arm.inc

@@ -1115,5 +1115,4 @@ end;
 {$endif}
 
 {include hand-optimized assembler division code}
-{ $i divide.inc}
-
+{$i divide.inc}

+ 191 - 0
rtl/arm/divide.inc

@@ -0,0 +1,191 @@
+{
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2014 by the Free Pascal development team.
+
+    Implementation of division helpers
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+
+{$if defined(CPUARM_HAS_UMULL) and defined(CPUARM_HAS_CLZ)}
+
+{ ARM division helpers using umull to do a 32-bit division based on
+  this paper: http://research.microsoft.com/pubs/70645/tr-2008-141.pdf
+
+  For future optimization and testing, this file is compilable outside
+  the system unit. }
+
+{$ifndef FPC_SYSTEM_HAS_DIV_DWORD}
+{$define FPC_SYSTEM_HAS_DIV_DWORD}
+
+function fpc_div_dword(n,z:dword):dword;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_DIV_DWORD'];{$endif}
+asm
+  // Handle division by zero
+  cmp   r0, #0
+  beq   .Lhandle_div_by_zero
+
+  stmfd r13!, {r4,lr}
+  // r1 = divisor
+  // r0 = dividend
+  // r2 = k, z
+  // r3 = ty, t, my
+  // ip = temp
+  // r4 = scratch
+
+  // unsigned k = clz(y);
+  clz   r2, r0
+
+  // unsigned ty = lsr( lsl(y,k), W-9 ); // prescaling
+  // r3/ty will ALWAYS give a result between 256 and 511
+  mov   r3, r0, lsl r2
+  mov   r3, r3, lsr #23
+
+  // unsigned t = unrt[ ty - 256 ] + 256; // table lookup
+  adr   r4, .LLeading9BitTable - 256
+  ldrb  ip, [r4, r3]
+  rsb   r3, r2, #31
+  and   r3, r3, #255
+  add   ip, ip, #256
+
+  // unsigned z = lsr( lsl(t,W-9), W-k-1 );
+  mov   ip, ip, lsl #23
+  mov   r2, ip, lsr r3
+
+  // unsigned my = 0-y;
+  rsb   r3, r0, #0
+
+  // z = z + umulh(z,mul(my,z));
+  mul   ip, r3, r2
+  umull r4, ip, r2, ip
+  add   r2, r2, ip
+  // z = z + umulh(z,mul(my,z));
+  mul   ip, r3, r2
+  umull r4, ip, r2, ip
+  add   r2, r2, ip
+
+  // q estimate
+  // q = umulh(x,z);
+  // ip = q
+  umull r4, ip, r1, r2
+
+  // r = x - mul(y,q);
+  // r4 = r
+  mul r4, r0, ip
+  sub r4, r1, r4
+
+  // q refinement
+  // if (r >= y) { r = r - y; q = q + 1; }
+  cmp r0,r4
+  subls r4, r4, r0
+  addls ip, ip, #1
+
+  // if (r >= y) { r = r - y; q = q + 1; }
+  cmp r0,r4
+  subls r4, r4, r0
+  addls ip, ip, #1
+
+  mov r0, ip
+  mov r1, r4
+
+  ldmfd r13!, {r4,pc}
+.LLeading9BitTable:
+  .byte 254, 252, 250, 248, 246, 244, 242, 240, 238, 236, 234, 233, 231, 229, 227, 225
+  .byte 224, 222, 220, 218, 217, 215, 213, 212, 210, 208, 207, 205, 203, 202, 200, 199
+  .byte 197, 195, 194, 192, 191, 189, 188, 186, 185, 183, 182, 180, 179, 178, 176, 175
+  .byte 173, 172, 170, 169, 168, 166, 165, 164, 162, 161, 160, 158, 157, 156, 154, 153
+  .byte 152, 151, 149, 148, 147, 146, 144, 143, 142, 141, 139, 138, 137, 136, 135, 134
+  .byte 132, 131, 130, 129, 128, 127, 126, 125, 123, 122, 121, 120, 119, 118, 117, 116
+  .byte 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100
+  .byte 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 88, 87, 86, 85
+  .byte 84, 83, 82, 81, 80, 80, 79, 78, 77, 76, 75, 74, 74, 73, 72, 71
+  .byte 70, 70, 69, 68, 67, 66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59
+  .byte 58, 57, 56, 56, 55, 54, 53, 53, 52, 51, 50, 50, 49, 48, 48, 47
+  .byte 46, 46, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 38, 37, 37, 36
+  .byte 35, 35, 34, 33, 33, 32, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26
+  .byte 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 17
+  .byte 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 10, 10, 9, 9, 8, 8
+  .byte 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0
+
+.Lhandle_div_by_zero:
+  mov r0, #200
+  mov r1, r11
+{$ifdef FPC_IS_SYSTEM}
+  b   handleerrorframe
+{$endif}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_div_dword(n,z:dword):dword;external name 'FPC_DIV_DWORD';
+{$endif}
+{$endif}
+
+{$ifndef FPC_SYSTEM_HAS_DIV_LONGINT}
+{$define FPC_SYSTEM_HAS_DIV_LONGINT}
+function fpc_div_longint(n,z:longint):longint;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_DIV_LONGINT'];{$endif}
+
+asm
+  stmfd sp!, {r4,lr}
+  ands r4, r0, #1<<31       (* r12:=r0 and $80000000 *)
+  rsbmi r0, r0, #0           (* if signed(r0) then r0:=0-r0 *)
+  eors r4, r4, r1, ASR#32  (* r12:=r12 xor (r1 asr 32) *)
+  rsbcs r1, r1, #0           (* if signed(r12) then r1:=0-r1 *)
+  bl fpc_div_dword
+  movs r4, r4, LSL#1       (* carry:=sign(r12) *)
+  rsbcs r0, r0, #0
+  rsbmi r1, r1, #0
+  ldmfd sp!, {r4,pc}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_div_longint(n,z:longint):longint;external name 'FPC_DIV_LONGINT';
+{$endif}
+{$endif}
+
+{$ifndef FPC_SYSTEM_HAS_MOD_DWORD}
+{$define FPC_SYSTEM_HAS_MOD_DWORD}
+function fpc_mod_dword(n,z:dword):dword;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_MOD_DWORD'];{$endif}
+
+asm
+  stmfd sp!, {ip,lr}
+  bl fpc_div_dword
+  mov r0, r1
+  ldmfd sp!, {ip,pc}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_mod_dword(n,z:dword):dword;external name 'FPC_MOD_DWORD';
+{$endif}
+{$endif}
+
+{$ifndef FPC_SYSTEM_HAS_MOD_LONGINT}
+{$define FPC_SYSTEM_HAS_MOD_LONGINT}
+function fpc_mod_longint(n,z:longint):longint;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_MOD_LONGINT'];{$endif}
+
+asm
+  stmfd sp!, {ip,lr}
+  bl fpc_div_longint
+  mov r0, r1
+  ldmfd sp!, {ip,pc}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_mod_longint(n,z:longint):longint;external name 'FPC_MOD_LONGINT';
+{$endif}
+{$endif}
+
+{$endif}