Browse Source

+ FMA support for aaarch64

git-svn-id: trunk@42885 -
florian 6 years ago
parent
commit
9c00a8b616
4 changed files with 124 additions and 0 deletions
  1. 1 0
      .gitattributes
  2. 7 0
      compiler/aarch64/ncpuadd.pas
  3. 90 0
      compiler/aarch64/ncpuinl.pas
  4. 26 0
      tests/test/tfma1a64.pp

+ 1 - 0
.gitattributes

@@ -14406,6 +14406,7 @@ tests/test/tfillchr.pp svneol=native#text/plain
 tests/test/tfinal1.pp svneol=native#text/pascal
 tests/test/tfinal2.pp svneol=native#text/pascal
 tests/test/tfma1.inc svneol=native#text/plain
+tests/test/tfma1a64.pp svneol=native#text/pascal
 tests/test/tfma1arm.pp svneol=native#text/pascal
 tests/test/tfma1x86.pp svneol=native#text/pascal
 tests/test/tforin1.pp svneol=native#text/pascal

+ 7 - 0
compiler/aarch64/ncpuadd.pas

@@ -34,6 +34,7 @@ interface
           function  GetResFlags(unsigned:Boolean):TResFlags;
           function  GetFPUResFlags:TResFlags;
        protected
+          function use_fma : boolean;override;
           procedure second_addfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpboolean;override;
@@ -62,6 +63,12 @@ interface
                                taarch64addnode
 *****************************************************************************}
 
+    function taarch64addnode.use_fma : boolean;
+      begin
+        Result:=true;
+      end;
+
+
     function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
       begin
         case NodeType of

+ 90 - 0
compiler/aarch64/ncpuinl.pas

@@ -35,6 +35,7 @@ interface
         function first_sqrt_real: tnode; override;
         function first_round_real: tnode; override;
         function first_trunc_real: tnode; override;
+        function first_fma : tnode; override;
         procedure second_abs_real; override;
         procedure second_sqr_real; override;
         procedure second_sqrt_real; override;
@@ -42,6 +43,7 @@ interface
         procedure second_round_real; override;
         procedure second_trunc_real; override;
         procedure second_get_frame; override;
+        procedure second_fma; override;
       private
         procedure load_fpu_location;
       end;
@@ -53,6 +55,7 @@ implementation
       globtype,verbose,globals,
       cpuinfo, defutil,symdef,aasmdata,aasmcpu,
       cgbase,cgutils,pass_1,pass_2,
+      ncal,
       cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
 
 {*****************************************************************************
@@ -104,6 +107,17 @@ implementation
       end;
 
 
+     function taarch64inlinenode.first_fma : tnode;
+       begin
+         if ((is_double(resultdef)) or (is_single(resultdef))) then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
+         else
+           Result:=inherited first_fma;
+       end;
+
     procedure taarch64inlinenode.second_abs_real;
       begin
         load_fpu_location;
@@ -178,6 +192,82 @@ implementation
         location.register:=NR_FRAME_POINTER_REG;
       end;
 
+
+    procedure taarch64inlinenode.second_fma;
+      const
+        op : array[false..true,false..true] of TAsmOp =
+          { positive product }
+          (
+           { positive third operand }
+           (A_FMADD,
+           { negative third operand }
+            A_FNMSUB),
+           { negative product }
+            { positive third operand }
+            (A_FMSUB,
+             A_FNMADD)
+           );
+
+      var
+        paraarray : array[1..3] of tnode;
+        i : integer;
+        negop3,
+        negproduct : boolean;
+      begin
+        negop3:=false;
+        negproduct:=false;
+        paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
+        paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+        paraarray[3]:=tcallparanode(parameters).paravalue;
+
+        { check if a neg. node can be removed
+          this is possible because changing the sign of
+          a floating point number does not affect its absolute
+          value in any way
+        }
+        if paraarray[1].nodetype=unaryminusn then
+          begin
+            paraarray[1]:=tunarynode(paraarray[1]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[2].nodetype=unaryminusn then
+          begin
+            paraarray[2]:=tunarynode(paraarray[2]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[3].nodetype=unaryminusn then
+          begin
+            paraarray[3]:=tunarynode(paraarray[3]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negop3:=true;
+          end;
+
+         for i:=1 to 3 do
+          secondpass(paraarray[i]);
+
+        { no memory operand is allowed }
+        for i:=1 to 3 do
+          begin
+            if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
+          end;
+
+        location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(op[negproduct,negop3],
+          location.register,paraarray[1].location.register,paraarray[2].location.register,paraarray[3].location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+      end;
+
+
 begin
   cinlinenode:=taarch64inlinenode;
 end.

+ 26 - 0
tests/test/tfma1a64.pp

@@ -0,0 +1,26 @@
+{ %CPU=aarch64 }
+
+{$i tfma1.inc}  
+  
+begin
+  d1:=2;
+  d2:=3;
+  d3:=4;
+  d0:=FMADouble(d1,d2,d3);
+  writeln(d0);
+  if d0<>10.0 then
+    halt(1);
+
+  s1:=2;
+  s2:=3;
+  s3:=4;
+  s0:=FMASingle(s1,s2,s3);
+  writeln(s0);
+  if s0<>10.0 then
+    halt(1);
+
+  testsingle;
+  testdouble;
+
+  writeln('ok');
+end.