divide.inc 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2014 by the Free Pascal development team.
  4. Implementation of division helpers
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$if defined(CPUARM_HAS_UMULL) and defined(CPUARM_HAS_CLZ)}
  12. { ARM division helpers using umull to do a 32-bit division based on
  13. this paper: http://research.microsoft.com/pubs/70645/tr-2008-141.pdf
  14. For future optimization and testing, this file is compilable outside
  15. the system unit. }
  16. {$ifndef FPC_SYSTEM_HAS_DIV_DWORD}
  17. {$define FPC_SYSTEM_HAS_DIV_DWORD}
  18. function fpc_div_dword(n,z:dword):dword;assembler;nostackframe;
  19. {$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_DIV_DWORD'];{$endif}
  20. asm
  21. // Handle division by zero
  22. cmp r0, #0
  23. beq .Lhandle_div_by_zero
  24. stmfd r13!, {r4,lr}
  25. // r1 = divisor
  26. // r0 = dividend
  27. // r2 = k, z
  28. // r3 = ty, t, my
  29. // ip = temp
  30. // r4 = scratch
  31. // unsigned k = clz(y);
  32. clz r2, r0
  33. // unsigned ty = lsr( lsl(y,k), W-9 ); // prescaling
  34. // r3/ty will ALWAYS give a result between 256 and 511
  35. mov r3, r0, lsl r2
  36. mov r3, r3, lsr #23
  37. // unsigned t = unrt[ ty - 256 ] + 256; // table lookup
  38. adr r4, .LLeading9BitTable - 256
  39. ldrb ip, [r4, r3]
  40. rsb r3, r2, #31
  41. and r3, r3, #255
  42. add ip, ip, #256
  43. // unsigned z = lsr( lsl(t,W-9), W-k-1 );
  44. mov ip, ip, lsl #23
  45. mov r2, ip, lsr r3
  46. // unsigned my = 0-y;
  47. rsb r3, r0, #0
  48. // z = z + umulh(z,mul(my,z));
  49. mul ip, r3, r2
  50. umull r4, ip, r2, ip
  51. add r2, r2, ip
  52. // z = z + umulh(z,mul(my,z));
  53. mul ip, r3, r2
  54. umull r4, ip, r2, ip
  55. add r2, r2, ip
  56. // q estimate
  57. // q = umulh(x,z);
  58. // ip = q
  59. umull r4, ip, r1, r2
  60. // r = x - mul(y,q);
  61. // r4 = r
  62. mul r4, r0, ip
  63. sub r4, r1, r4
  64. // q refinement
  65. // if (r >= y) { r = r - y; q = q + 1; }
  66. cmp r0,r4
  67. subls r4, r4, r0
  68. addls ip, ip, #1
  69. // if (r >= y) { r = r - y; q = q + 1; }
  70. cmp r0,r4
  71. subls r4, r4, r0
  72. addls ip, ip, #1
  73. mov r0, ip
  74. mov r1, r4
  75. ldmfd r13!, {r4,pc}
  76. .LLeading9BitTable:
  77. .byte 254, 252, 250, 248, 246, 244, 242, 240, 238, 236, 234, 233, 231, 229, 227, 225
  78. .byte 224, 222, 220, 218, 217, 215, 213, 212, 210, 208, 207, 205, 203, 202, 200, 199
  79. .byte 197, 195, 194, 192, 191, 189, 188, 186, 185, 183, 182, 180, 179, 178, 176, 175
  80. .byte 173, 172, 170, 169, 168, 166, 165, 164, 162, 161, 160, 158, 157, 156, 154, 153
  81. .byte 152, 151, 149, 148, 147, 146, 144, 143, 142, 141, 139, 138, 137, 136, 135, 134
  82. .byte 132, 131, 130, 129, 128, 127, 126, 125, 123, 122, 121, 120, 119, 118, 117, 116
  83. .byte 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100
  84. .byte 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 88, 87, 86, 85
  85. .byte 84, 83, 82, 81, 80, 80, 79, 78, 77, 76, 75, 74, 74, 73, 72, 71
  86. .byte 70, 70, 69, 68, 67, 66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59
  87. .byte 58, 57, 56, 56, 55, 54, 53, 53, 52, 51, 50, 50, 49, 48, 48, 47
  88. .byte 46, 46, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 38, 37, 37, 36
  89. .byte 35, 35, 34, 33, 33, 32, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26
  90. .byte 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 17
  91. .byte 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 10, 10, 9, 9, 8, 8
  92. .byte 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0
  93. .Lhandle_div_by_zero:
  94. mov r0, #200
  95. mov r1, r11
  96. {$ifdef FPC_IS_SYSTEM}
  97. b handleerrorframe
  98. {$endif}
  99. end;
  100. {It is a compilerproc (systemh.inc), make an alias for internal use.}
  101. {$ifdef FPC_IS_SYSTEM}
  102. function fpc_div_dword(n,z:dword):dword;external name 'FPC_DIV_DWORD';
  103. {$endif}
  104. {$endif}
  105. {$ifndef FPC_SYSTEM_HAS_DIV_LONGINT}
  106. {$define FPC_SYSTEM_HAS_DIV_LONGINT}
  107. function fpc_div_longint(n,z:longint):longint;assembler;nostackframe;
  108. {$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_DIV_LONGINT'];{$endif}
  109. asm
  110. stmfd sp!, {r4,lr}
  111. ands r4, r0, #1<<31 (* r12:=r0 and $80000000 *)
  112. rsbmi r0, r0, #0 (* if signed(r0) then r0:=0-r0 *)
  113. eors r4, r4, r1, ASR#32 (* r12:=r12 xor (r1 asr 32) *)
  114. rsbcs r1, r1, #0 (* if signed(r12) then r1:=0-r1 *)
  115. bl fpc_div_dword
  116. movs r4, r4, LSL#1 (* carry:=sign(r12) *)
  117. rsbcs r0, r0, #0
  118. rsbmi r1, r1, #0
  119. ldmfd sp!, {r4,pc}
  120. end;
  121. {It is a compilerproc (systemh.inc), make an alias for internal use.}
  122. {$ifdef FPC_IS_SYSTEM}
  123. function fpc_div_longint(n,z:longint):longint;external name 'FPC_DIV_LONGINT';
  124. {$endif}
  125. {$endif}
  126. {$ifndef FPC_SYSTEM_HAS_MOD_DWORD}
  127. {$define FPC_SYSTEM_HAS_MOD_DWORD}
  128. function fpc_mod_dword(n,z:dword):dword;assembler;nostackframe;
  129. {$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_MOD_DWORD'];{$endif}
  130. asm
  131. stmfd sp!, {ip,lr}
  132. bl fpc_div_dword
  133. mov r0, r1
  134. ldmfd sp!, {ip,pc}
  135. end;
  136. {It is a compilerproc (systemh.inc), make an alias for internal use.}
  137. {$ifdef FPC_IS_SYSTEM}
  138. function fpc_mod_dword(n,z:dword):dword;external name 'FPC_MOD_DWORD';
  139. {$endif}
  140. {$endif}
  141. {$ifndef FPC_SYSTEM_HAS_MOD_LONGINT}
  142. {$define FPC_SYSTEM_HAS_MOD_LONGINT}
  143. function fpc_mod_longint(n,z:longint):longint;assembler;nostackframe;
  144. {$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_MOD_LONGINT'];{$endif}
  145. asm
  146. stmfd sp!, {ip,lr}
  147. bl fpc_div_longint
  148. mov r0, r1
  149. ldmfd sp!, {ip,pc}
  150. end;
  151. {It is a compilerproc (systemh.inc), make an alias for internal use.}
  152. {$ifdef FPC_IS_SYSTEM}
  153. function fpc_mod_longint(n,z:longint):longint;external name 'FPC_MOD_LONGINT';
  154. {$endif}
  155. {$endif}
  156. {$endif}