瀏覽代碼

+ patch by Jeppe Johansen to make use of the div/udiv instruction on arm7m, resolves #20022
* explicitly make symbol addressing PC relative

git-svn-id: trunk@19221 -

florian 14 年之前
父節點
當前提交
5fa184c952
共有 3 個文件被更改,包括 107 次插入134 次删除
  1. 5 6
      compiler/arm/cgcpu.pas
  2. 83 25
      compiler/arm/narmmat.pas
  3. 19 103
      rtl/arm/divide.inc

+ 5 - 6
compiler/arm/cgcpu.pas

@@ -283,6 +283,7 @@ unit cgcpu;
                current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
 
                hr.symbol:=l;
+               hr.base:=NR_PC;
                list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
             end;
        end;
@@ -640,8 +641,7 @@ unit cgcpu;
 
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
           case op of
-            OP_NEG,OP_NOT,
-            OP_DIV,OP_IDIV:
+            OP_NEG,OP_NOT:
               internalerror(200308281);
             OP_SHL:
               begin
@@ -742,11 +742,11 @@ unit cgcpu;
         else
           begin
             { there could be added some more sophisticated optimizations }
-            if (op in [OP_MUL,OP_IMUL]) and (a=1) then
+            if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
               a_load_reg_reg(list,size,size,src,dst)
             else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
               a_load_const_reg(list,size,0,dst)
-            else if (op in [OP_IMUL]) and (a=-1) then
+            else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
               a_op_reg_reg(list,OP_NEG,size,src,dst)
             { we do this here instead in the peephole optimizer because
               it saves us a register }
@@ -3192,8 +3192,7 @@ unit cgcpu;
       begin
         ovloc.loc:=LOC_VOID;
         case op of
-           OP_NEG,OP_NOT,
-           OP_DIV,OP_IDIV:
+           OP_NEG,OP_NOT:
               internalerror(200308281);
            OP_ROL:
               begin

+ 83 - 25
compiler/arm/narmmat.pas

@@ -54,7 +54,8 @@ implementation
       pass_2,procinfo,
       ncon,
       cpubase,cpuinfo,
-      ncgutil,cgcpu;
+      ncgutil,cgcpu,
+      nadd,pass_1,symdef;
 
 {*****************************************************************************
                              TARMMODDIVNODE
@@ -72,6 +73,26 @@ implementation
           ) and
           not(is_64bitint(resultdef)) then
           result:=nil
+        else if (current_settings.cputype in [cpu_armv7m]) and
+          (nodetype=divn) and
+          not(is_64bitint(resultdef)) then
+          result:=nil
+        else if (current_settings.cputype in [cpu_armv7m]) and
+          (nodetype=modn) and
+          not(is_64bitint(resultdef)) then
+          begin
+            if (right.nodetype=ordconstn) and
+              ispowerof2(tordconstnode(right).value,power) and
+              (tordconstnode(right).value<=256) and
+              (tordconstnode(right).value>0) then
+              result:=caddnode.create(andn,left,cordconstnode.create(tordconstnode(right).value-1,sinttype,false))
+            else
+              begin
+                result:=caddnode.create(subn,left,caddnode.create(muln,right.getcopy, cmoddivnode.Create(divn,left.getcopy,right.getcopy)));
+                right:=nil;
+              end;
+            left:=nil;
+          end
         else
           result:=inherited first_moddivint;
       end;
@@ -167,38 +188,75 @@ implementation
       begin
         secondpass(left);
         secondpass(right);
-        location_copy(location,left.location);
-
-        { put numerator in register }
-        size:=def_cgsize(left.resultdef);
-        location_force_reg(current_asmdata.CurrAsmList,left.location,
-          size,true);
-        location_copy(location,left.location);
-        numerator:=location.register;
-        resultreg:=location.register;
-        if location.loc=LOC_CREGISTER then
+
+        if (current_settings.cputype in [cpu_armv7m]) and
+           (nodetype=divn) and
+           not(is_64bitint(resultdef)) then
           begin
+            size:=def_cgsize(left.resultdef);
+            location_force_reg(current_asmdata.CurrAsmList,left.location,size,true);
+
+            location_copy(location,left.location);
             location.loc := LOC_REGISTER;
             location.register := cg.getintregister(current_asmdata.CurrAsmList,size);
             resultreg:=location.register;
-          end
-        else if (nodetype=modn) or (right.nodetype=ordconstn) then
-          begin
-            // for a modulus op, and for const nodes we need the result register
-            // to be an extra register
-            resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
-          end;
 
-        if right.nodetype=ordconstn then
-          begin
-            if nodetype=divn then
-              genOrdConstNodeDiv
+            if (right.nodetype=ordconstn) and
+               ((tordconstnode(right).value=1) or
+                (tordconstnode(right).value=int64(-1)) or
+                (tordconstnode(right).value=0) or
+                ispowerof2(tordconstnode(right).value,power)) then
+              begin
+                numerator:=left.location.register;
+
+                genOrdConstNodeDiv;
+              end
             else
-//              genOrdConstNodeMod;
+              begin
+                location_force_reg(current_asmdata.CurrAsmList,right.location,size,true);
+
+                if is_signed(left.resultdef) or
+                   is_signed(right.resultdef) then
+                  cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_IDIV,OS_INT,right.location.register,left.location.register,location.register)
+                else
+                  cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_DIV,OS_INT,right.location.register,left.location.register,location.register);
+              end;
+          end
+        else
+          begin
+            location_copy(location,left.location);
+
+            { put numerator in register }
+            size:=def_cgsize(left.resultdef);
+            location_force_reg(current_asmdata.CurrAsmList,left.location,
+              size,true);
+            location_copy(location,left.location);
+            numerator:=location.register;
+            resultreg:=location.register;
+            if location.loc=LOC_CREGISTER then
+              begin
+                location.loc := LOC_REGISTER;
+                location.register := cg.getintregister(current_asmdata.CurrAsmList,size);
+                resultreg:=location.register;
+              end
+            else if (nodetype=modn) or (right.nodetype=ordconstn) then
+              begin
+                // for a modulus op, and for const nodes we need the result register
+                // to be an extra register
+                resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
+              end;
+
+            if right.nodetype=ordconstn then
+              begin
+                if nodetype=divn then
+                  genOrdConstNodeDiv
+                else
+    //              genOrdConstNodeMod;
+              end;
+
+            location.register:=resultreg;
           end;
 
-        location.register:=resultreg;
-
         { unsigned division/module can only overflow in case of division by zero }
         { (but checking this overflow flag is more convoluted than performing a  }
         {  simple comparison with 0)                                             }

+ 19 - 103
rtl/arm/divide.inc

@@ -41,88 +41,9 @@
 function fpc_div_dword(n,z:dword):dword;[public,alias: 'FPC_DIV_DWORD'];assembler;nostackframe;
 
 asm
-  {$if defined(CPUCORTEXM3) or defined(CPUARMV7M)}
-  {$ifdef CPUCORTEXM3}
-  udiv r0, r1, r0
-  {$else}
-  mov r3, #0
-  rsbs r2, r0, r1, LSR#3
-  bcc .Ldiv_3bits
-  rsbs r2, r0, r1, LSR#8
-  bcc .Ldiv_8bits
-  mov r0, r0, LSL#8
-  orr r3, r3, #0xFF000000
-  rsbs r2, r0, r1, LSR#4
-  bcc .Ldiv_4bits
-  rsbs r2, r0, r1, LSR#8
-  bcc .Ldiv_8bits
-  mov r0, r0, LSL#8
-  orr r3, r3, #0x00FF0000
-  rsbs r2, r0, r1, LSR#8
-  itt cs
-  movcs r0, r0, LSL#8
-  orrcs r3, r3, #0x0000FF00
-  rsbs r2, r0, r1, LSR#4
-  bcc .Ldiv_4bits
-  rsbs r2, r0, #0
-  bcs .Ldiv_by_0
-.Ldiv_loop:
-  it cs
-  movcs r0, r0, LSR#8
-.Ldiv_8bits:
-  rsbs r2, r0, r1, LSR#7
-  it cs
-  subcs r1, r1, r0, LSL#7
-  adc r3, r3, r3
-  rsbs r2, r0, r1, LSR#6
-  it cs
-  subcs r1, r1, r0, LSL#6
-  adc r3, r3, r3
-  rsbs r2, r0, r1, LSR#5
-  it cs
-  subcs r1, r1, r0, LSL#5
-  adc r3, r3, r3
-  rsbs r2, r0, r1, LSR#4
-  it cs
-  subcs r1, r1, r0, LSL#4
-  adc r3, r3, r3
-.Ldiv_4bits:
-  rsbs r2, r0, r1, LSR#3
-  it cs
-  subcs r1, r1, r0, LSL#3
-  adc r3, r3, r3
-.Ldiv_3bits:
-  rsbs r2, r0, r1, LSR#2
-  it cs
-  subcs r1, r1, r0, LSL#2
-  adc r3, r3, r3
-  rsbs r2, r0, r1, LSR#1
-  it cs
-  subcs r1, r1, r0, LSL#1
-  adc r3, r3, r3
-  rsbs r2, r0, r1
-  it cs
-  subcs r1, r1, r0
-  adcs r3, r3, r3
-.Ldiv_next:
-  bcs .Ldiv_loop
-  mov r0, r3
-{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
-  mov pc, lr
+{$if defined(CPUARMV7M)}
+  udiv r0, r0, r1
 {$else}
-  bx  lr
-{$endif}
-.Ldiv_by_0:
-  mov r0, #200
-  mov r1, r11
-  bl handleerrorframe
-{$if defined(cpuarmv3) or defined(cpuarmv4) or defined(cpuarmv5)}
-  mov pc, lr
-{$else}
-  bx  lr
-{$endif}
-  {$endif}
-  {$else}
   mov r3, #0
   rsbs r2, r0, r1, LSR#3
   bcc .Ldiv_3bits
@@ -189,7 +110,7 @@ asm
 {$else}
   bx  lr
 {$endif}
-  {$endif}
+{$endif}
 end;
 
 {It is a compilerproc (systemh.inc), make an alias for internal use.}
@@ -201,26 +122,9 @@ function fpc_div_dword(n,z:dword):dword;external name 'FPC_DIV_DWORD';
 function fpc_div_longint(n,z:longint):longint;[public,alias: 'FPC_DIV_LONGINT'];assembler;nostackframe;
 
 asm
-  {$if defined(CPUCORTEXM3) or defined(CPUARMV7M)}
-  {$ifdef CPUCORTEXM3}
-  sdiv r0, r1, r0
-  {$else}
-  stmfd sp!, {lr}
-  ands r12, r0, #1<<31       (* r12:=r0 and $80000000 *)
-  it mi
-  rsbmi r0, r0, #0           (* if signed(r0) then r0:=0-r0 *)
-  eors r12, r12, r1, ASR#32  (* r12:=r12 xor (r1 asr 32) *)
-  it cs
-  rsbcs r1, r1, #0           (* if signed(r12) then r1:=0-r1 *)
-  bl fpc_div_dword
-  movs r12, r12, LSL#1       (* carry:=sign(r12) *)
-  it cs
-  rsbcs r0, r0, #0
-  it mi
-  rsbmi r1, r1, #0
-  ldmfd sp!, {pc}
-  {$endif}
-  {$else}
+{$if defined(CPUARMV7M)}
+  sdiv r0, r0, r1
+{$else}
   stmfd sp!, {lr}
   ands r12, r0, #1<<31       (* r12:=r0 and $80000000 *)
   rsbmi r0, r0, #0           (* if signed(r0) then r0:=0-r0 *)
@@ -231,7 +135,7 @@ asm
   rsbcs r0, r0, #0
   rsbmi r1, r1, #0
   ldmfd sp!, {pc}
-  {$endif}
+{$endif}
 end;
 
 {It is a compilerproc (systemh.inc), make an alias for internal use.}
@@ -243,10 +147,16 @@ function fpc_div_longint(n,z:longint):longint;external name 'FPC_DIV_LONGINT';
 function fpc_mod_dword(n,z:dword):dword;[public,alias: 'FPC_MOD_DWORD'];assembler;nostackframe;
 
 asm
+{$if defined(CPUARMV7M)}
+  udiv r2, r0, r1
+  mul r2,r1,r2
+  sub r0,r0,r2
+{$else}
   stmfd sp!, {lr}
   bl fpc_div_dword
   mov r0, r1
   ldmfd sp!, {pc}
+{$endif}
 end;
 
 {It is a compilerproc (systemh.inc), make an alias for internal use.}
@@ -258,10 +168,16 @@ function fpc_mod_dword(n,z:dword):dword;external name 'FPC_MOD_DWORD';
 function fpc_mod_longint(n,z:longint):longint;[public,alias: 'FPC_MOD_LONGINT'];assembler;nostackframe;
 
 asm
+{$if defined(CPUARMV7M)}
+  sdiv r2, r0, r1
+  smull r2,r3,r1,r2
+  sub r0,r0,r2
+{$else}
   stmfd sp!, {lr}
   bl fpc_div_longint
   mov r0, r1
   ldmfd sp!, {pc}
+{$endif}
 end;
 
 {It is a compilerproc (systemh.inc), make an alias for internal use.}