12 yıl önce · 4a107dcfa6
--- a/rtl/i8086/int32p.inc
+++ b/rtl/i8086/int32p.inc
@@ -72,3 +72,198 @@ begin
 
				     HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
			
 
				 end;
			
 
				 
			
 
				+
			
 
				+{$define FPC_SYSTEM_HAS_DIV_DWORD}
			
 
				+function fpc_div_dword( n, z: dword ): dword; [public, alias:'FPC_DIV_DWORD']; compilerproc;
			
 
				+begin
			
 
				+{ routine contributed by Max Nazhalov }
			
 
				+  result := 0;
			
 
				+  if n=0 then
			
 
				+    HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
			
 
				+  asm
			
 
				+      mov    ax,word [z]
			
 
				+      mov    dx,word [z+2]
			
 
				+      mov    bx,word [n]
			
 
				+      mov    cx,word [n+2]
			
 
				+      // check for underflow: z<n
			
 
				+      mov    si,dx
			
 
				+      cmp    ax,bx
			
 
				+      sbb    si,cx
			
 
				+      jc     @@3
			
 
				+      // select one of 3 trivial cases
			
 
				+      test   cx,cx
			
 
				+      jnz    @@1
			
 
				+      cmp    dx,bx
			
 
				+      jnc    @@0
			
 
				+      // (i) single division: n<=0xFFFF, z<=(n<<16)-1
			
 
				+      div    bx
			
 
				+      mov    word [result],ax
			
 
				+      jmp    @@3
			
 
				+@@0:  // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
			
 
				+      //  q1 := [0:z1] div n; r := [0:z1] mod n;
			
 
				+      //  q0 := [r:z0] div n;
			
 
				+      xchg   ax,cx
			
 
				+      xchg   ax,dx
			
 
				+    { dx=0, ax=z1, cx=z0 }
			
 
				+      div    bx
			
 
				+      xchg   ax,cx
			
 
				+    { dx=r, ax=z0, cx=q1 }
			
 
				+      div    bx
			
 
				+      mov    word [result],ax
			
 
				+      mov    word [result+2],cx
			
 
				+      jmp    @@3
			
 
				+@@1:  // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
			
 
				+      // Special case of the generic "schoolbook" division [see e.g. Knuth]:
			
 
				+      //  1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
			
 
				+      //     n>=0x10000 -> m<=15
			
 
				+      //  2. adjust divident accordingly: [z2:z1:z0] := z<<m
			
 
				+      //     m<=15 -> z2<=0x7FFF
			
 
				+      // implementation: instead do >> dropping n0 and z0
			
 
				+      mov    si,bx // save n0
			
 
				+      mov    di,cx // save n1
			
 
				+      test   ch,ch
			
 
				+      jz     @@2
			
 
				+      mov    bl,bh
			
 
				+      mov    bh,cl
			
 
				+      mov    cl,ch
			
 
				+      mov    al,ah
			
 
				+      mov    ah,dl
			
 
				+      mov    dl,dh
			
 
				+      xor    dh,dh
			
 
				+@@2:  // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
			
 
				+      shr    cl,1
			
 
				+      rcr    bx,1
			
 
				+      shr    dx,1
			
 
				+      rcr    ax,1
			
 
				+      test   cl,cl
			
 
				+      jnz    @@2
			
 
				+      //  3. estimate quotient: q_hat := [z2:z1]/n1
			
 
				+      //     Division never overflows since z2<=0x7FFF and n1>0x7FFF
			
 
				+      div    bx
			
 
				+      //  4. multiply & subtract calculating remainder:
			
 
				+      //     r := z-n*q_hat (z and n are original)
			
 
				+      //  5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
			
 
				+      //     theoretically, 0..2 iterations are required [see e.g. Knuth];
			
 
				+      //     in practice, with such initial data, at most one iteration
			
 
				+      //     is needed (no disproof has been found yet; and if it will
			
 
				+      //     ever be found -- it also should raise doubts about the i386
			
 
				+      //     fpc_div_qword helper again; see FPC mantis #23963)
			
 
				+      mov    cx,ax // save q_hat
			
 
				+      mul    si
			
 
				+      mov    bx,ax
			
 
				+      mov    si,dx
			
 
				+      mov    ax,cx
			
 
				+      mul    di
			
 
				+      xor    di,di
			
 
				+      add    ax,si
			
 
				+      adc    dx,di // [dx:ax:bx] := n*q_hat; di=0
			
 
				+      mov    si,word [z]
			
 
				+      sub    si,bx
			
 
				+      mov    si,word [z+2]
			
 
				+      sbb    si,ax
			
 
				+      sbb    di,dx
			
 
				+      sbb    cx,0
			
 
				+      //  6. done: q := [0:cx]
			
 
				+      mov    word [result],cx
			
 
				+@@3:
			
 
				+  end;
			
 
				+end;
			
 
				+
			
 
				+
			
 
				+{$define FPC_SYSTEM_HAS_MOD_DWORD}
			
 
				+function fpc_mod_dword( n, z: dword ): dword; [public, alias:'FPC_MOD_DWORD']; compilerproc;
			
 
				+begin
			
 
				+{ routine contributed by Max Nazhalov }
			
 
				+  result := z;
			
 
				+  if n=0 then
			
 
				+    HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
			
 
				+  asm
			
 
				+      mov    ax,word [z]
			
 
				+      mov    dx,word [z+2]
			
 
				+      mov    bx,word [n]
			
 
				+      mov    cx,word [n+2]
			
 
				+      // check for underflow: z<n
			
 
				+      mov    si,dx
			
 
				+      cmp    ax,bx
			
 
				+      sbb    si,cx
			
 
				+      jc     @@4
			
 
				+      // select one of 3 trivial cases
			
 
				+      test   cx,cx
			
 
				+      jnz    @@1
			
 
				+      cmp    dx,bx
			
 
				+      jnc    @@0
			
 
				+      // (i) single division: n<=0xFFFF, z<=(n<<16)-1
			
 
				+      div    bx
			
 
				+      jmp    @@3 // r=cx:dx (cx=0)
			
 
				+@@0:  // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
			
 
				+      //  q1 := [0:z1] div n; r := [0:z1] mod n;
			
 
				+      //  q0 := [r:z0] div n; r := [r:z0] mod n;
			
 
				+      xchg   ax,cx
			
 
				+      xchg   ax,dx
			
 
				+    { dx=0, ax=z1, cx=z0 }
			
 
				+      div    bx
			
 
				+      mov    ax,cx
			
 
				+      xor    cx,cx
			
 
				+    { dx=r, ax=z0, cx=0 }
			
 
				+      div    bx
			
 
				+      jmp    @@3 // r=cx:dx (cx=0)
			
 
				+@@1:  // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
			
 
				+      // Special case of the generic "schoolbook" division [see e.g. Knuth]:
			
 
				+      //  1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
			
 
				+      //     n>=0x10000 -> m<=15
			
 
				+      //  2. adjust divident accordingly: [z2:z1:z0] := z<<m
			
 
				+      //     m<=15 -> z2<=0x7FFF
			
 
				+      // implementation: instead do >> dropping n0 and z0
			
 
				+      mov    si,bx // save n0
			
 
				+      mov    di,cx // save n1
			
 
				+      test   ch,ch
			
 
				+      jz     @@2
			
 
				+      mov    bl,bh
			
 
				+      mov    bh,cl
			
 
				+      mov    cl,ch
			
 
				+      mov    al,ah
			
 
				+      mov    ah,dl
			
 
				+      mov    dl,dh
			
 
				+      xor    dh,dh
			
 
				+@@2:  // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
			
 
				+      shr    cl,1
			
 
				+      rcr    bx,1
			
 
				+      shr    dx,1
			
 
				+      rcr    ax,1
			
 
				+      test   cl,cl
			
 
				+      jnz    @@2
			
 
				+      //  3. estimate quotient: q_hat := [z2:z1]/n1
			
 
				+      //     Division never overflows since z2<=0x7FFF and n1>0x7FFF
			
 
				+      div    bx
			
 
				+      //  4. multiply & subtract calculating remainder:
			
 
				+      //     r := z-n*q_hat (z and n are original)
			
 
				+      //  5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
			
 
				+      //     theoretically, 0..2 iterations are required [see e.g. Knuth];
			
 
				+      //     in practice, with such initial data, at most one iteration
			
 
				+      //     is needed (no disproof has been found yet; and if it will
			
 
				+      //     ever be found -- it also should raise doubts about the i386
			
 
				+      //     fpc_div_qword helper again; see FPC mantis #23963)
			
 
				+      mov    cx,ax // save q_hat
			
 
				+      mul    si
			
 
				+      mov    bx,ax
			
 
				+      mov    si,dx
			
 
				+      mov    ax,cx
			
 
				+      mul    di
			
 
				+      xor    di,di
			
 
				+      add    ax,si
			
 
				+      adc    dx,di // [dx:ax:bx] := n*q_hat; di=0
			
 
				+      mov    si,word [z]
			
 
				+      mov    cx,word [z+2]
			
 
				+      sub    si,bx
			
 
				+      sbb    cx,ax
			
 
				+      sbb    di,dx
			
 
				+      mov    dx,si
			
 
				+      jnc    @@3
			
 
				+      add    dx,word [n]
			
 
				+      adc    cx,word [n+2]
			
 
				+@@3:  // done: r=cx:dx
			
 
				+      mov    word [result],dx
			
 
				+      mov    word [result+2],cx
			
 
				+@@4:
			
 
				+  end;
			
 
				+end;