Browse Source

+ in_min/max_single/double support for aarch64

florian 3 years ago
parent
commit
77b9d62520
2 changed files with 71 additions and 4 deletions
  1. 64 0
      compiler/aarch64/ncpuinl.pas
  2. 7 4
      compiler/nflw.pas

+ 64 - 0
compiler/aarch64/ncpuinl.pas

@@ -38,6 +38,7 @@ interface
         function first_int_real: tnode; override;
         function first_frac_real: tnode; override;
         function first_fma : tnode; override;
+        function first_minmax : tnode; override;
         procedure second_abs_real; override;
         procedure second_sqr_real; override;
         procedure second_sqrt_real; override;
@@ -49,6 +50,7 @@ interface
         procedure second_get_frame; override;
         procedure second_fma; override;
         procedure second_prefetch; override;
+        procedure second_minmax; override;
       private
         procedure load_fpu_location;
       end;
@@ -58,6 +60,7 @@ implementation
 
     uses
       globtype,verbose,globals,
+      compinnr,
       cpuinfo, defutil,symdef,aasmdata,aasmcpu,
       cgbase,cgutils,pass_1,pass_2,
       ncal,nutils,
@@ -347,6 +350,67 @@ implementation
       end;
 
 
+    function taarch64inlinenode.first_minmax : tnode;
+      begin
+        if is_single(resultdef) or is_double(resultdef) then
+          begin
+            expectloc:=LOC_MMREGISTER;
+            Result:=nil;
+          end
+        else
+          Result:=inherited first_minmax;
+      end;
+
+
+    procedure taarch64inlinenode.second_minmax;
+      var
+        paraarray : array[1..2] of tnode;
+        i: Integer;
+        ai: taicpu;
+        op: TAsmOp;
+      begin
+         if is_single(resultdef) or is_double(resultdef) then
+           begin
+             paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+             paraarray[2]:=tcallparanode(parameters).paravalue;
+
+              for i:=low(paraarray) to high(paraarray) do
+               secondpass(paraarray[i]);
+
+             { no memory operand is allowed }
+             for i:=low(paraarray) to high(paraarray) do
+               begin
+                 if not(paraarray[i].location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
+                   hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,
+                     paraarray[i].resultdef,true);
+               end;
+
+             location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+             location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMP,
+               paraarray[1].location.register,paraarray[2].location.register));
+
+             case inlinenumber of
+               in_min_single,
+               in_min_double:
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_FCSEL,
+                  location.register,paraarray[1].location.register,paraarray[2].location.register,C_MI));
+               in_max_single,
+               in_max_double:
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_FCSEL,
+                  location.register,paraarray[1].location.register,paraarray[2].location.register,C_GT));
+               else
+                 Internalerror(2021121802);
+             end;
+
+             cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+           end
+         else
+           internalerror(2021121801);
+      end;
+
+
 begin
   cinlinenode:=taarch64inlinenode;
 end.

+ 7 - 4
compiler/nflw.pas

@@ -1559,12 +1559,12 @@ implementation
 
 
     function tifnode.internalsimplify(warn: boolean) : tnode;
-{$if defined(i386) or defined(x86_64) or defined(xtensa)}
+{$if defined(i386) or defined(x86_64) or defined(xtensa) or defined(aarch64)}
       var
         thenstmnt, elsestmnt: tnode;
         in_nr: tinlinenumber;
         paratype: tdef;
-{$endif}
+{$endif defined(i386) or defined(x86_64) or defined(xtensa) or defined(aarch64)}
       begin
         result:=nil;
         { optimize constant expressions }
@@ -1592,7 +1592,7 @@ implementation
                end;
           end;
 {$ifndef llvm}
-{$if defined(i386) or defined(x86_64) or defined(xtensa)}
+{$if defined(i386) or defined(x86_64) or defined(xtensa) or defined(aarch64)}
         { use min/max intrinsic?
           convert (with <op> being <, >, >=, <=
           if a <op> b then
@@ -1628,6 +1628,9 @@ implementation
 {$if defined(xtensa)}
           (CPUXTENSA_HAS_MINMAX in cpu_capabilities[current_settings.cputype]) and is_32bitint(tassignmentnode(thenstmnt).right.resultdef) and
 {$endif defined(xtensa)}
+{$if defined(aarch64)}
+          (is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef)) and
+{$endif defined(aarch64)}
           (
           { the right size of the assignment in the then clause must either }
 
@@ -1702,7 +1705,7 @@ implementation
                       ccallparanode.create(tassignmentnode(thenstmnt).right.getcopy,nil)))
                 );
           end;
-{$endif defined(i386) or defined(x86_64) or defined(xtensa)}
+{$endif defined(i386) or defined(x86_64) or defined(xtensa) or defined(aarch64)}
 {$endif llvm}
       end;