Browse Source

+ min/max optimization support for RiscV

florian 7 months ago
parent
commit
7aae7a8d51
2 changed files with 102 additions and 28 deletions
  1. 11 2
      compiler/nflw.pas
  2. 91 26
      compiler/riscv/nrvinl.pas

+ 11 - 2
compiler/nflw.pas

@@ -304,9 +304,9 @@ implementation
     {$ifdef i8086}
     {$ifdef i8086}
       cpuinfo,
       cpuinfo,
     {$endif i8086}
     {$endif i8086}
-    {$if defined(xtensa) or defined(i386)}
+    {$if defined(xtensa) or defined(i386) or defined(riscv)}
       cpuinfo,
       cpuinfo,
-    {$endif defined(xtensa) or defined(i386)}
+    {$endif defined(xtensa) or defined(i386) or defined(riscv)}
       cgbase,procinfo
       cgbase,procinfo
       ;
       ;
 
 
@@ -1654,6 +1654,15 @@ implementation
           (is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef) or
           (is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef) or
            is_32bitint(tassignmentnode(thenstmnt).left.resultdef) or is_64bitint(tassignmentnode(thenstmnt).left.resultdef)) and
            is_32bitint(tassignmentnode(thenstmnt).left.resultdef) or is_64bitint(tassignmentnode(thenstmnt).left.resultdef)) and
 {$endif defined(aarch64)}
 {$endif defined(aarch64)}
+{$if defined(riscv)}
+          { RiscV fmin/fmax/fminm/fmaxm uses the IEEE semantics (2008 or 201x) of min/max regarding NaN (using either
+            always the NaN or non-NaN operand instead of the second one in case on is NaN), so
+            we can use them only when fast math is on }
+          ((cs_opt_fastmath in current_settings.optimizerswitches) and
+           ((is_single(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_F in cpu_capabilities[current_settings.cputype])) or
+            (is_double(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_D in cpu_capabilities[current_settings.cputype])) or
+            (is_quad(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_Q in cpu_capabilities[current_settings.cputype])))) and
+{$endif defined(riscv)}
           (
           (
           { the right size of the assignment in the then clause must either }
           { the right size of the assignment in the then clause must either }
 
 

+ 91 - 26
compiler/riscv/nrvinl.pas

@@ -30,38 +30,37 @@ interface
        node,ninl,ncginl;
        node,ninl,ncginl;
 
 
     type
     type
-
-       { trvinlinenode }
-
-       trvinlinenode = class(tcginlinenode)
-          { first pass override
-            so that the code generator will actually generate
-            these nodes.
-          }
-          function first_sqrt_real: tnode; override;
-          function first_abs_real: tnode; override;
-          function first_sqr_real: tnode; override;
-          function first_round_real: tnode; override;
-          function first_trunc_real: tnode; override;
-
-          function first_fma: tnode; override;
-
-          procedure second_sqrt_real; override;
-          procedure second_abs_real; override;
-          procedure second_sqr_real; override;
-          procedure second_round_real; override;
-          procedure second_trunc_real; override;
-
-          procedure second_fma; override;
-       protected
-          procedure load_fpu_location;
-       end;
+      trvinlinenode = class(tcginlinenode)
+        { first pass override
+          so that the code generator will actually generate
+          these nodes.
+        }
+        function first_sqrt_real: tnode; override;
+        function first_abs_real: tnode; override;
+        function first_sqr_real: tnode; override;
+        function first_round_real: tnode; override;
+        function first_trunc_real: tnode; override;
+        function first_fma: tnode; override;
+        function first_minmax: tnode; override;
+
+        procedure second_sqrt_real; override;
+        procedure second_abs_real; override;
+        procedure second_sqr_real; override;
+        procedure second_round_real; override;
+        procedure second_trunc_real; override;
+
+        procedure second_fma; override;
+        procedure second_minmax; override;
+      protected
+        procedure load_fpu_location;
+      end;
 
 
 implementation
 implementation
 
 
     uses
     uses
       ncal,
       ncal,
       cutils,globals,verbose,globtype,
       cutils,globals,verbose,globtype,
+      compinnr,
       aasmtai,aasmdata,aasmcpu,
       aasmtai,aasmdata,aasmcpu,
       symconst,symdef,
       symconst,symdef,
       defutil,
       defutil,
@@ -159,6 +158,20 @@ implementation
        end;
        end;
 
 
 
 
+    function trvinlinenode.first_minmax : tnode;
+      begin
+        if is_single(resultdef) or is_double(resultdef)  or is_quad(resultdef) then
+          begin
+            expectloc:=LOC_FPUREGISTER;
+            Result:=nil;
+            if needs_check_for_fpu_exceptions then
+              Include(current_procinfo.flags,pi_do_call);
+          end
+        else
+          Result:=inherited first_minmax;
+      end;
+
+
      { load the FPU into the an fpu register }
      { load the FPU into the an fpu register }
      procedure trvinlinenode.load_fpu_location;
      procedure trvinlinenode.load_fpu_location;
        begin
        begin
@@ -376,6 +389,58 @@ implementation
        end;
        end;
 
 
 
 
+    procedure trvinlinenode.second_minmax;
+      var
+        paraarray : array[1..2] of tnode;
+        i: Integer;
+        ai: taicpu;
+        opcode: TAsmOp;
+        cond: TAsmCond;
+      begin
+        paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+          paraarray[2]:=tcallparanode(parameters).paravalue;
+
+        for i:=low(paraarray) to high(paraarray) do
+           secondpass(paraarray[i]);
+
+        if is_single(resultdef) or is_double(resultdef) then
+           begin
+             { no memory operand is allowed }
+             for i:=low(paraarray) to high(paraarray) do
+               begin
+                 if not(paraarray[i].location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
+                   hlcg.location_force_fpureg(current_asmdata.CurrAsmList,paraarray[i].location,
+                     paraarray[i].resultdef,true);
+               end;
+
+             location_reset(location,LOC_FPUREGISTER,paraarray[1].location.size);
+             location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+
+             case inlinenumber of
+               in_min_single:
+                 opcode:=A_FMIN_S;
+               in_min_double:
+                 opcode:=A_FMIN_D;
+               in_min_quad:
+                 opcode:=A_FMAX_Q;
+               in_max_single:
+                 opcode:=A_FMAX_S;
+               in_max_double:
+                 opcode:=A_FMAX_D;
+               in_max_quad:
+                 opcode:=A_FMAX_Q;
+               else
+                 Internalerror(2025010502);
+             end;
+             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(opcode,
+               location.register,paraarray[1].location.register,paraarray[2].location.register));
+
+             cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+           end
+         else
+           internalerror(2025010501);
+      end;
+
 begin
 begin
    cinlinenode:=trvinlinenode;
    cinlinenode:=trvinlinenode;
 end.
 end.