12 سال پیش · 4a107dcfa6
--- a/rtl/i8086/int32p.inc
+++ b/rtl/i8086/int32p.inc
@@ -72,3 +72,198 @@ begin
 
															     HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
														
 
															 end;
														
 
															+
														
 
															+{$define FPC_SYSTEM_HAS_DIV_DWORD}
														
 
															+function fpc_div_dword( n, z: dword ): dword; [public, alias:'FPC_DIV_DWORD']; compilerproc;
														
 
															+begin
														
 
															+{ routine contributed by Max Nazhalov }
														
 
															+  result := 0;
														
 
															+  if n=0 then
														
 
															+    HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
														
 
															+  asm
														
 
															+      mov    ax,word [z]
														
 
															+      mov    dx,word [z+2]
														
 
															+      mov    bx,word [n]
														
 
															+      mov    cx,word [n+2]
														
 
															+      // check for underflow: z<n
														
 
															+      mov    si,dx
														
 
															+      cmp    ax,bx
														
 
															+      sbb    si,cx
														
 
															+      jc     @@3
														
 
															+      // select one of 3 trivial cases
														
 
															+      test   cx,cx
														
 
															+      jnz    @@1
														
 
															+      cmp    dx,bx
														
 
															+      jnc    @@0
														
 
															+      // (i) single division: n<=0xFFFF, z<=(n<<16)-1
														
 
															+      div    bx
														
 
															+      mov    word [result],ax
														
 
															+      jmp    @@3
														
 
															+@@0:  // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
														
 
															+      //  q1 := [0:z1] div n; r := [0:z1] mod n;
														
 
															+      //  q0 := [r:z0] div n;
														
 
															+      xchg   ax,cx
														
 
															+      xchg   ax,dx
														
 
															+    { dx=0, ax=z1, cx=z0 }
														
 
															+      div    bx
														
 
															+      xchg   ax,cx
														
 
															+    { dx=r, ax=z0, cx=q1 }
														
 
															+      div    bx
														
 
															+      mov    word [result],ax
														
 
															+      mov    word [result+2],cx
														
 
															+      jmp    @@3
														
 
															+@@1:  // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
														
 
															+      // Special case of the generic "schoolbook" division [see e.g. Knuth]:
														
 
															+      //  1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
														
 
															+      //     n>=0x10000 -> m<=15
														
 
															+      //  2. adjust divident accordingly: [z2:z1:z0] := z<<m
														
 
															+      //     m<=15 -> z2<=0x7FFF
														
 
															+      // implementation: instead do >> dropping n0 and z0
														
 
															+      mov    si,bx // save n0
														
 
															+      mov    di,cx // save n1
														
 
															+      test   ch,ch
														
 
															+      jz     @@2
														
 
															+      mov    bl,bh
														
 
															+      mov    bh,cl
														
 
															+      mov    cl,ch
														
 
															+      mov    al,ah
														
 
															+      mov    ah,dl
														
 
															+      mov    dl,dh
														
 
															+      xor    dh,dh
														
 
															+@@2:  // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
														
 
															+      shr    cl,1
														
 
															+      rcr    bx,1
														
 
															+      shr    dx,1
														
 
															+      rcr    ax,1
														
 
															+      test   cl,cl
														
 
															+      jnz    @@2
														
 
															+      //  3. estimate quotient: q_hat := [z2:z1]/n1
														
 
															+      //     Division never overflows since z2<=0x7FFF and n1>0x7FFF
														
 
															+      div    bx
														
 
															+      //  4. multiply & subtract calculating remainder:
														
 
															+      //     r := z-n*q_hat (z and n are original)
														
 
															+      //  5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
														
 
															+      //     theoretically, 0..2 iterations are required [see e.g. Knuth];
														
 
															+      //     in practice, with such initial data, at most one iteration
														
 
															+      //     is needed (no disproof has been found yet; and if it will
														
 
															+      //     ever be found -- it also should raise doubts about the i386
														
 
															+      //     fpc_div_qword helper again; see FPC mantis #23963)
														
 
															+      mov    cx,ax // save q_hat
														
 
															+      mul    si
														
 
															+      mov    bx,ax
														
 
															+      mov    si,dx
														
 
															+      mov    ax,cx
														
 
															+      mul    di
														
 
															+      xor    di,di
														
 
															+      add    ax,si
														
 
															+      adc    dx,di // [dx:ax:bx] := n*q_hat; di=0
														
 
															+      mov    si,word [z]
														
 
															+      sub    si,bx
														
 
															+      mov    si,word [z+2]
														
 
															+      sbb    si,ax
														
 
															+      sbb    di,dx
														
 
															+      sbb    cx,0
														
 
															+      //  6. done: q := [0:cx]
														
 
															+      mov    word [result],cx
														
 
															+@@3:
														
 
															+  end;
														
 
															+end;
														
 
															+
														
 
															+
														
 
															+{$define FPC_SYSTEM_HAS_MOD_DWORD}
														
 
															+function fpc_mod_dword( n, z: dword ): dword; [public, alias:'FPC_MOD_DWORD']; compilerproc;
														
 
															+begin
														
 
															+{ routine contributed by Max Nazhalov }
														
 
															+  result := z;
														
 
															+  if n=0 then
														
 
															+    HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
														
 
															+  asm
														
 
															+      mov    ax,word [z]
														
 
															+      mov    dx,word [z+2]
														
 
															+      mov    bx,word [n]
														
 
															+      mov    cx,word [n+2]
														
 
															+      // check for underflow: z<n
														
 
															+      mov    si,dx
														
 
															+      cmp    ax,bx
														
 
															+      sbb    si,cx
														
 
															+      jc     @@4
														
 
															+      // select one of 3 trivial cases
														
 
															+      test   cx,cx
														
 
															+      jnz    @@1
														
 
															+      cmp    dx,bx
														
 
															+      jnc    @@0
														
 
															+      // (i) single division: n<=0xFFFF, z<=(n<<16)-1
														
 
															+      div    bx
														
 
															+      jmp    @@3 // r=cx:dx (cx=0)
														
 
															+@@0:  // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
														
 
															+      //  q1 := [0:z1] div n; r := [0:z1] mod n;
														
 
															+      //  q0 := [r:z0] div n; r := [r:z0] mod n;
														
 
															+      xchg   ax,cx
														
 
															+      xchg   ax,dx
														
 
															+    { dx=0, ax=z1, cx=z0 }
														
 
															+      div    bx
														
 
															+      mov    ax,cx
														
 
															+      xor    cx,cx
														
 
															+    { dx=r, ax=z0, cx=0 }
														
 
															+      div    bx
														
 
															+      jmp    @@3 // r=cx:dx (cx=0)
														
 
															+@@1:  // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
														
 
															+      // Special case of the generic "schoolbook" division [see e.g. Knuth]:
														
 
															+      //  1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
														
 
															+      //     n>=0x10000 -> m<=15
														
 
															+      //  2. adjust divident accordingly: [z2:z1:z0] := z<<m
														
 
															+      //     m<=15 -> z2<=0x7FFF
														
 
															+      // implementation: instead do >> dropping n0 and z0
														
 
															+      mov    si,bx // save n0
														
 
															+      mov    di,cx // save n1
														
 
															+      test   ch,ch
														
 
															+      jz     @@2
														
 
															+      mov    bl,bh
														
 
															+      mov    bh,cl
														
 
															+      mov    cl,ch
														
 
															+      mov    al,ah
														
 
															+      mov    ah,dl
														
 
															+      mov    dl,dh
														
 
															+      xor    dh,dh
														
 
															+@@2:  // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
														
 
															+      shr    cl,1
														
 
															+      rcr    bx,1
														
 
															+      shr    dx,1
														
 
															+      rcr    ax,1
														
 
															+      test   cl,cl
														
 
															+      jnz    @@2
														
 
															+      //  3. estimate quotient: q_hat := [z2:z1]/n1
														
 
															+      //     Division never overflows since z2<=0x7FFF and n1>0x7FFF
														
 
															+      div    bx
														
 
															+      //  4. multiply & subtract calculating remainder:
														
 
															+      //     r := z-n*q_hat (z and n are original)
														
 
															+      //  5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
														
 
															+      //     theoretically, 0..2 iterations are required [see e.g. Knuth];
														
 
															+      //     in practice, with such initial data, at most one iteration
														
 
															+      //     is needed (no disproof has been found yet; and if it will
														
 
															+      //     ever be found -- it also should raise doubts about the i386
														
 
															+      //     fpc_div_qword helper again; see FPC mantis #23963)
														
 
															+      mov    cx,ax // save q_hat
														
 
															+      mul    si
														
 
															+      mov    bx,ax
														
 
															+      mov    si,dx
														
 
															+      mov    ax,cx
														
 
															+      mul    di
														
 
															+      xor    di,di
														
 
															+      add    ax,si
														
 
															+      adc    dx,di // [dx:ax:bx] := n*q_hat; di=0
														
 
															+      mov    si,word [z]
														
 
															+      mov    cx,word [z+2]
														
 
															+      sub    si,bx
														
 
															+      sbb    cx,ax
														
 
															+      sbb    di,dx
														
 
															+      mov    dx,si
														
 
															+      jnc    @@3
														
 
															+      add    dx,word [n]
														
 
															+      adc    cx,word [n+2]
														
 
															+@@3:  // done: r=cx:dx
														
 
															+      mov    word [result],dx
														
 
															+      mov    word [result+2],cx
														
 
															+@@4:
														
 
															+  end;
														
 
															+end;