Browse Source

* use constrained LLVM fp intrinsics for add/sub/mul/slash/fma/sqrt
operations when fastmath is not enabled

git-svn-id: trunk@43819 -

Jonas Maebe 5 years ago
parent
commit
797077855e
3 changed files with 204 additions and 96 deletions
  1. 100 65
      compiler/llvm/nllvmadd.pas
  2. 73 29
      compiler/llvm/nllvminl.pas
  3. 31 2
      rtl/inc/llvmintr.inc

+ 100 - 65
compiler/llvm/nllvmadd.pas

@@ -47,20 +47,75 @@ interface
 implementation
 implementation
 
 
      uses
      uses
-       verbose,globtype,
+       verbose,globtype,globals,cutils,
        aasmdata,
        aasmdata,
        symconst,symtype,symdef,defutil,
        symconst,symtype,symdef,defutil,
        llvmbase,aasmllvm,
        llvmbase,aasmllvm,
-       cgbase,cgutils,
+       cgbase,cgutils,pass_1,
        hlcgobj,
        hlcgobj,
-       nadd
+       nadd,ncal,ncnv,ncon
        ;
        ;
 
 
 { tllvmaddnode }
 { tllvmaddnode }
 
 
   function tllvmaddnode.pass_1: tnode;
   function tllvmaddnode.pass_1: tnode;
+    var
+      intrname: string;
+      iscompcurrency: boolean;
     begin
     begin
       result:=inherited pass_1;
       result:=inherited pass_1;
+      if not assigned(result) and
+         is_fpu(left.resultdef) and
+         not(cs_opt_fastmath in current_settings.optimizerswitches) then
+        begin
+          case nodetype of
+            addn:
+              begin
+                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FADD';
+              end;
+            subn:
+              begin
+                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FSUB';
+              end;
+            muln:
+              begin
+                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMUL';
+              end;
+            slashn:
+              begin
+                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FDIV';
+              end;
+            else
+              begin
+                intrname:='';
+              end;
+          end;
+          if intrname<>'' then
+            begin
+              iscompcurrency:=tfloatdef(left.resultdef).floattype in [s64currency,s64comp];
+              if iscompcurrency then
+                begin
+                  inserttypeconv_internal(left,s80floattype);
+                  inserttypeconv_internal(right,s80floattype);
+                end;
+              result:=ccallnode.createintern(intrname,
+                ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
+                  ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
+                    ccallparanode.create(right,
+                      ccallparanode.create(left,nil)
+                    )
+                  )
+                )
+              );
+              if iscompcurrency then
+                begin
+                  result:=ctypeconvnode.create_internal(result,resultdef);
+                end;
+              left:=nil;
+              right:=nil;
+              exit;
+            end;
+        end;
       { there are no flags in LLVM }
       { there are no flags in LLVM }
       if expectloc=LOC_FLAGS then
       if expectloc=LOC_FLAGS then
         expectloc:=LOC_REGISTER;
         expectloc:=LOC_REGISTER;
@@ -225,51 +280,10 @@ implementation
       tmpreg: tregister;
       tmpreg: tregister;
       op    : tllvmop;
       op    : tllvmop;
       llvmfpcmp : tllvmfpcmp;
       llvmfpcmp : tllvmfpcmp;
-      size : tdef;
-      cmpop,
-      singleprec : boolean;
+      size  : tdef;
     begin
     begin
       pass_left_right;
       pass_left_right;
 
 
-      cmpop:=false;
-      singleprec:=tfloatdef(left.resultdef).floattype=s32real;
-      { avoid uninitialised warning }
-      llvmfpcmp:=lfc_invalid;
-      case nodetype of
-        addn :
-          op:=la_fadd;
-        muln :
-          op:=la_fmul;
-        subn :
-          op:=la_fsub;
-        slashn :
-          op:=la_fdiv;
-        ltn,lten,gtn,gten,
-        equaln,unequaln :
-          begin
-            op:=la_fcmp;
-            cmpop:=true;
-            case nodetype of
-              ltn:
-                llvmfpcmp:=lfc_olt;
-              lten:
-                llvmfpcmp:=lfc_ole;
-              gtn:
-                llvmfpcmp:=lfc_ogt;
-              gten:
-                llvmfpcmp:=lfc_oge;
-              equaln:
-                llvmfpcmp:=lfc_oeq;
-              unequaln:
-                llvmfpcmp:=lfc_une;
-              else
-                internalerror(2015031506);
-            end;
-          end;
-        else
-          internalerror(2013102401);
-      end;
-
       { get the operands in the correct order; there are no special cases here,
       { get the operands in the correct order; there are no special cases here,
         everything is register-based }
         everything is register-based }
       if nf_swapped in flags then
       if nf_swapped in flags then
@@ -279,37 +293,58 @@ implementation
       hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
       hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
       hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
       hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
 
 
-      { initialize the result location }
-      if not cmpop then
-        begin
-          location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
-          location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
-        end
-      else
-        begin
-          location_reset(location,LOC_REGISTER,OS_8);
-          location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
-        end;
-
       { see comment in thlcgllvm.a_loadfpu_ref_reg }
       { see comment in thlcgllvm.a_loadfpu_ref_reg }
       if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
       if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
         size:=sc80floattype
         size:=sc80floattype
       else
       else
         size:=left.resultdef;
         size:=left.resultdef;
 
 
-      { emit the actual operation }
-      if not cmpop then
-        begin
-          current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
-            left.location.register,right.location.register))
-        end
-      else
+      if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
         begin
         begin
-          current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(op,
+          case nodetype of
+            ltn:
+              llvmfpcmp:=lfc_olt;
+            lten:
+              llvmfpcmp:=lfc_ole;
+            gtn:
+              llvmfpcmp:=lfc_ogt;
+            gten:
+              llvmfpcmp:=lfc_oge;
+            equaln:
+              llvmfpcmp:=lfc_oeq;
+            unequaln:
+              llvmfpcmp:=lfc_une;
+            else
+              internalerror(2015031506);
+          end;
+          location_reset(location,LOC_REGISTER,OS_8);
+          location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
+
+          current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(la_fcmp ,
             location.register,llvmfpcmp,size,left.location.register,right.location.register));
             location.register,llvmfpcmp,size,left.location.register,right.location.register));
           tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
           tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
           hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,llvmbool1type,resultdef,location.register,tmpreg);
           hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,llvmbool1type,resultdef,location.register,tmpreg);
           location.register:=tmpreg;
           location.register:=tmpreg;
+        end
+      else
+        begin
+          case nodetype of
+            addn :
+              op:=la_fadd;
+            muln :
+              op:=la_fmul;
+            subn :
+              op:=la_fsub;
+            slashn :
+              op:=la_fdiv;
+            else
+              internalerror(2013102401);
+          end;
+          location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
+          location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
+
+          current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
+            left.location.register,right.location.register))
         end;
         end;
     end;
     end;
 
 

+ 73 - 29
compiler/llvm/nllvminl.pas

@@ -52,7 +52,7 @@ interface
 implementation
 implementation
 
 
      uses
      uses
-       verbose,globals,globtype,constexp,
+       verbose,globals,globtype,constexp,cutils,
        aasmbase, aasmdata,
        aasmbase, aasmdata,
        symconst,symtype,symdef,defutil,
        symconst,symtype,symdef,defutil,
        compinnr,
        compinnr,
@@ -219,21 +219,43 @@ implementation
 
 
     function tllvminlinenode.first_fma: tnode;
     function tllvminlinenode.first_fma: tnode;
       var
       var
-        procname: string[15];
+        procname: string[40];
       begin
       begin
-        case inlinenumber of
-          in_fma_single:
-            procname:='llvm_fma_f32';
-          in_fma_double:
-            procname:='llvm_fma_f64';
-          in_fma_extended:
-            procname:='llvm_fma_f80';
-          in_fma_float128:
-            procname:='llvm_fma_f128';
-          else
-            internalerror(2018122101);
-        end;
-        result:=ccallnode.createintern(procname,left);
+        if cs_opt_fastmath in current_settings.optimizerswitches then
+          begin
+            case inlinenumber of
+              in_fma_single:
+                procname:='llvm_fma_f32';
+              in_fma_double:
+                procname:='llvm_fma_f64';
+              in_fma_extended:
+                procname:='llvm_fma_f80';
+              in_fma_float128:
+                procname:='llvm_fma_f128';
+              else
+                internalerror(2018122101);
+            end;
+            result:=ccallnode.createintern(procname,left);
+          end
+        else
+          begin
+            case inlinenumber of
+              in_fma_single,
+              in_fma_double,
+              in_fma_extended,
+              in_fma_float128:
+                procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
+              else
+                internalerror(2019122811);
+            end;
+            result:=ccallnode.createintern(procname,
+              ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
+                ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
+                  left
+                )
+              )
+            );
+          end;
         left:=nil;
         left:=nil;
       end;
       end;
 
 
@@ -250,23 +272,45 @@ implementation
 
 
     function tllvminlinenode.first_sqrt_real: tnode;
     function tllvminlinenode.first_sqrt_real: tnode;
       var
       var
-        intrinsic: string[20];
+        intrinsic: string[40];
       begin
       begin
         if left.resultdef.typ<>floatdef then
         if left.resultdef.typ<>floatdef then
           internalerror(2018121601);
           internalerror(2018121601);
-        case tfloatdef(left.resultdef).floattype of
-          s32real:
-            intrinsic:='llvm_sqrt_f32';
-          s64real:
-            intrinsic:='llvm_sqrt_f64';
-          s80real,sc80real:
-            intrinsic:='llvm_sqrt_f80';
-          s128real:
-            intrinsic:='llvm_sqrt_f128';
-          else
-            internalerror(2018121602);
-        end;
-        result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
+        if cs_opt_fastmath in current_settings.optimizerswitches then
+          begin
+            case tfloatdef(left.resultdef).floattype of
+              s32real:
+                intrinsic:='llvm_sqrt_f32';
+              s64real:
+                intrinsic:='llvm_sqrt_f64';
+              s80real,sc80real:
+                intrinsic:='llvm_sqrt_f80';
+              s128real:
+                intrinsic:='llvm_sqrt_f128';
+              else
+                internalerror(2018121602);
+            end;
+            result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
+          end
+        else
+          begin
+            case tfloatdef(left.resultdef).floattype of
+              s32real,
+              s64real,
+              s80real,sc80real,
+              s128real:
+                intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
+              else
+                internalerror(2019122810);
+            end;
+            result:=ccallnode.createintern(intrinsic,
+              ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
+                ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
+                  ccallparanode.create(left,nil)
+                )
+              )
+            );
+          end;
         left:=nil;
         left:=nil;
       end;
       end;
 
 

+ 31 - 2
rtl/inc/llvmintr.inc

@@ -41,19 +41,48 @@ function llvm_ctpop(src: UInt32): UInt32; external name 'llvm.ctpop.i32';
 function llvm_ctpop(src: UInt64): UInt64; external name 'llvm.ctpop.i64';
 function llvm_ctpop(src: UInt64): UInt64; external name 'llvm.ctpop.i64';
 
 
 function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32';
 function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32';
+function llvm_experimental_constrained_sqrt(val: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.sqrt.f32';
 function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64';
 function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64';
+function llvm_experimental_constrained_sqrt(val: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.sqrt.f64';
 {$ifdef SUPPORT_EXTENDED}
 {$ifdef SUPPORT_EXTENDED}
-function llvm_sqrt_f80(val: extended): extended; compilerproc; external name 'llvm.sqrt.f80';
+function llvm_sqrt_f80(val: cextended): cextended; compilerproc; external name 'llvm.sqrt.x86_fp80';
+function llvm_experimental_constrained_sqrt(val: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.sqrt.x86_fp80';
 {$endif}
 {$endif}
 {$ifdef SUPPORT_FLOAT128}
 {$ifdef SUPPORT_FLOAT128}
 function llvm_sqrt_f128(val: float128): float128; compilerproc; external name 'llvm.sqrt.f128';
 function llvm_sqrt_f128(val: float128): float128; compilerproc; external name 'llvm.sqrt.f128';
+function llvm_experimental_constrained_sqrt(val: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.sqrt.f128';
 {$endif}
 {$endif}
 
 
 function llvm_fma_f32(a, b, c: single): single; compilerproc; external name 'llvm.fma.f32';
 function llvm_fma_f32(a, b, c: single): single; compilerproc; external name 'llvm.fma.f32';
+function llvm_experimental_constrained_fma(a, b, c: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fma.f32';
 function llvm_fma_f64(a, b, c: double): double; compilerproc; external name 'llvm.fma.f64';
 function llvm_fma_f64(a, b, c: double): double; compilerproc; external name 'llvm.fma.f64';
+function llvm_experimental_constrained_fma(a, b, c: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fma.f64';
 {$ifdef SUPPORT_EXTENDED}
 {$ifdef SUPPORT_EXTENDED}
-function llvm_fma_f80(a, b, c: extended): extended; compilerproc; external name 'llvm.fma.f80';
+function llvm_fma_f80(a, b, c: cextended): cextended; compilerproc; external name 'llvm.fma.x86_fp80';
+function llvm_experimental_constrained_fma(a, b, c: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.fma.x86_fp80';
 {$endif}
 {$endif}
 {$ifdef SUPPORT_FLOAT128}
 {$ifdef SUPPORT_FLOAT128}
 function llvm_fma_f128(a, b, c: float128): float128; compilerproc; external name 'llvm.fma.f128';
 function llvm_fma_f128(a, b, c: float128): float128; compilerproc; external name 'llvm.fma.f128';
+function llvm_experimental_constrained_fma(a, b, c: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fma.f128';
+{$endif}
+
+function llvm_experimental_constrained_fadd(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fadd.f32';
+function llvm_experimental_constrained_fsub(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fsub.f32';
+function llvm_experimental_constrained_fmul(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fmul.f32';
+function llvm_experimental_constrained_fdiv(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fdiv.f32';
+function llvm_experimental_constrained_fadd(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fadd.f64';
+function llvm_experimental_constrained_fsub(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fsub.f64';
+function llvm_experimental_constrained_fmul(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fmul.f64';
+function llvm_experimental_constrained_fdiv(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fdiv.f64';
+{$ifdef SUPPORT_EXTENDED}
+function llvm_experimental_constrained_fadd(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fadd.x86_fp80';
+function llvm_experimental_constrained_fsub(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fsub.x86_fp80';
+function llvm_experimental_constrained_fmul(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fmul.x86_fp80';
+function llvm_experimental_constrained_fdiv(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fdiv.x86_fp80';
+{$endif}
+{$ifdef SUPPORT_FLOAT128}
+function llvm_experimental_constrained_fadd(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fadd.f128';
+function llvm_experimental_constrained_fsub(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fsub.f128';
+function llvm_experimental_constrained_fmul(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fmul.f128';
+function llvm_experimental_constrained_fdiv(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fdiv.f128';
 {$endif}
 {$endif}