Преглед на файлове

Merge commits 42525 and 45891 that add
checks for fpu exceptions for arm and aarch64.
------------------------------------------------------------------------
r42525 | florian | 2019-07-28 21:06:36 +0000 (Sun, 28 Jul 2019) | 2 lines

+ software handling of exceptions on arm
* reworked software handling of exceptions so they can be check lazily
------------------------------------------------------------------------
--- Merging r42525 into '.':
U compiler/arm/cgcpu.pas
U compiler/arm/narmadd.pas
U compiler/arm/narminl.pas
U compiler/arm/narmmat.pas
U compiler/ncgcal.pas
U compiler/procinfo.pas
U rtl/arm/arm.inc
--- Recording mergeinfo for merge of r42525 into '.':
U .
Summary of conflicts:
Tree conflicts: 1
------------------------------------------------------------------------
r42891 | florian | 2019-09-01 17:26:11 +0000 (Sun, 01 Sep 2019) | 1 line

+ support for software floating point exception handling on AArch64 (-CE)
------------------------------------------------------------------------
--- Merging r42891 into '.':
U compiler/aarch64/cgcpu.pas
U compiler/aarch64/ncpuadd.pas
U compiler/aarch64/ncpuinl.pas
U compiler/aarch64/ncpumat.pas
U rtl/aarch64/aarch64.inc
U rtl/aarch64/math.inc
U rtl/aarch64/mathu.inc
--- Recording mergeinfo for merge of r42891 into '.':
G .

git-svn-id: branches/fixes_3_2@46225 -

pierre преди 5 години
родител
ревизия
d1f31fab15

+ 36 - 0
compiler/aarch64/cgcpu.pas

@@ -100,6 +100,7 @@ interface
         procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
         procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
         procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
        private
         function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
         procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
@@ -989,6 +990,7 @@ implementation
             instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
           end;
         list.Concat(instr);
+        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -2212,6 +2214,40 @@ implementation
       end;
 
 
+    procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l1,l2: TAsmLabel;
+      begin
+        { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
+            list.concat(taicpu.op_reg_const(A_TST,r,$1f));
+            current_asmdata.getjumplabel(l1);
+            current_asmdata.getjumplabel(l2);
+            ai:=taicpu.op_sym(A_B,l1);
+            ai.is_jmp:=true;
+            ai.condition:=C_NE;
+            list.concat(ai);
+            list.concat(taicpu.op_reg_const(A_TST,r,$80));
+            ai:=taicpu.op_sym(A_B,l2);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            a_label(list,l1);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l2);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
 
     procedure create_codegen;
       begin

+ 2 - 0
compiler/aarch64/ncpuadd.pas

@@ -211,6 +211,7 @@ interface
 
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
            location.register,left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -231,6 +232,7 @@ interface
         { signalling compare so we can get exceptions }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
              left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 

+ 4 - 0
compiler/aarch64/ncpuinl.pas

@@ -108,6 +108,7 @@ implementation
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -115,6 +116,7 @@ implementation
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -122,6 +124,7 @@ implementation
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -155,6 +158,7 @@ implementation
         { convert to signed integer rounding towards zero (there's no "round to
           integer using current rounding mode") }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 

+ 1 - 0
compiler/aarch64/ncpumat.pas

@@ -187,6 +187,7 @@ implementation
         location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
         location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 begin

+ 42 - 1
compiler/arm/cgcpu.pas

@@ -61,6 +61,8 @@ unit cgcpu;
         procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
         procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
 
+        procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
+
         procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
         {  comparison operations }
         procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
@@ -1712,6 +1714,33 @@ unit cgcpu;
        end;
 
 
+    procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l: TAsmLabel;
+      begin
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
+            list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
+            current_asmdata.getjumplabel(l);
+            ai:=taicpu.op_sym(A_B,l);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(current_asmdata.CurrAsmList,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
+
     {  comparison operations }
     procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
       l : tasmlabel);
@@ -3017,6 +3046,7 @@ unit cgcpu;
           A_VMOV:
             add_move_instruction(instr);
         end;
+        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3078,6 +3108,7 @@ unit cgcpu;
 
         if (tmpmmreg<>reg) then
           a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
+        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3139,6 +3170,7 @@ unit cgcpu;
           begin
              handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
           end;
+        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3154,6 +3186,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
           internalerror(2009112516);
         list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
+        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3169,6 +3202,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
           internalerror(2009112514);
         list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
+        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3287,6 +3321,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112405);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
+        cg.maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3297,6 +3332,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112406);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
+        cg.maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -5103,6 +5139,7 @@ unit cgcpu;
             instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
             list.Concat(instr);
             add_move_instruction(instr);
+            maybe_check_for_fpu_exception(list);
           end
         else if (fromsize=OS_F64) and
           (tosize=OS_F64) then
@@ -5128,6 +5165,7 @@ unit cgcpu;
     procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
       begin
         handle_load_store(list,A_VSTR,PF_None,reg,ref);
+        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -5145,7 +5183,10 @@ unit cgcpu;
       begin
         if //(shuffle=nil) and
           (fromsize=OS_F32) then
-          list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
+          begin
+            list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
+            maybe_check_for_fpu_exception(list);
+          end
         else
           internalerror(2012100814);
       end;

+ 4 - 0
compiler/arm/narmadd.pas

@@ -238,6 +238,7 @@ interface
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op,
                  location.register,left.location.register,right.location.register),pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end;
           fpu_fpv4_s16:
             begin
@@ -263,6 +264,7 @@ interface
               end;
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end;
           fpu_soft:
             { this case should be handled already by pass1 }
@@ -325,6 +327,7 @@ interface
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
                 left.location.register,right.location.register), pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VMRS,NR_APSR_nzcv,NR_FPSCR));
               location.resflags:=GetFpuResFlags;
@@ -341,6 +344,7 @@ interface
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
                 left.location.register,right.location.register),PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
             end;

+ 16 - 3
compiler/arm/narminl.pas

@@ -272,9 +272,13 @@ implementation
               else
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end;
           fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
+            begin
+              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end;
           fpu_soft:
             begin
               if singleprec then
@@ -309,9 +313,13 @@ implementation
               else
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end;
           fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
+            begin
+              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end;
         else
           internalerror(2009111403);
         end;
@@ -339,9 +347,13 @@ implementation
               else
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end;
           fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
+            begin
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end;
         else
           internalerror(2009111402);
         end;
@@ -515,6 +527,7 @@ implementation
                oppostfix:=PF_F32;
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
                location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
+             cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else
            internalerror(2014032301);

+ 2 - 0
compiler/arm/narmmat.pas

@@ -433,6 +433,7 @@ implementation
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
                 location.register,left.location.register), pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end;
           fpu_fpv4_s16:
             begin
@@ -442,6 +443,7 @@ implementation
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
                 location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end;
           fpu_soft:
             begin

+ 27 - 0
compiler/cgobj.pas

@@ -449,6 +449,15 @@ unit cgobj;
             generic version is suitable for 3-address CPUs }
           procedure g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister); virtual;
 
+          { some CPUs do not support hardware fpu exceptions, this procedure is called after instructions which
+            might set FPU exception related flags, so it has to check these flags if needed and throw an exeception }
+          procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); virtual;
+          procedure maybe_check_for_fpu_exception(list: TAsmList);
+
+          { some CPUs do not support hardware fpu exceptions, this procedure is called after instructions which
+            might set FPU exception related flags, so it has to check these flags if needed and throw an exeception }
+          procedure g_check_for_fpu_exception(list: TAsmList); virtual;
+
          protected
           function g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;virtual;
        end;
@@ -2525,6 +2534,12 @@ implementation
 {$endif cpuflags}
 
 
+    procedure tcg.g_check_for_fpu_exception(list: TAsmList);
+      begin
+        { empty by default }
+      end;
+
+
 {*****************************************************************************
                             Entry/Exit Code Functions
 *****************************************************************************}
@@ -2888,6 +2903,18 @@ implementation
       end;
 
 
+    procedure tcg.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      begin
+        { empty by default }
+      end;
+
+
+    procedure tcg.maybe_check_for_fpu_exception(list: TAsmList);
+      begin
+        current_procinfo.FPUExceptionCheckNeeded:=true;
+        g_check_for_fpu_exception(list,false,true);
+      end;
+
 {*****************************************************************************
                                     TCG64
 *****************************************************************************}

+ 2 - 1
compiler/ncal.pas

@@ -56,7 +56,8 @@ interface
          cnf_call_never_returns, { information for the dfa that a subroutine never returns }
          cnf_call_self_node_done,{ the call_self_node has been generated if necessary
                                    (to prevent it from potentially happening again in a wrong context in case of constant propagation or so) }
-         cnf_ignore_visibility   { internally generated call that should ignore visibility checks }
+         cnf_ignore_visibility,  { internally generated call that should ignore visibility checks }
+         cnf_check_fpu_exceptions { after the call fpu exceptions shall be checked }
        );
        tcallnodeflags = set of tcallnodeflag;
 

+ 12 - 0
compiler/ncgcal.pas

@@ -1289,6 +1289,18 @@ implementation
          { release temps of paras }
          release_para_temps;
 
+         { check for fpu exceptions }
+         if cnf_check_fpu_exceptions in callnodeflags then
+           cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+
+         { check for fpu exceptions }
+         if cnf_check_fpu_exceptions in callnodeflags then
+           cg.g_check_for_fpu_exception(current_asmdata.CurrAsmList);
+
+         { check for fpu exceptions }
+         if cnf_check_fpu_exceptions in callnodeflags then
+           cg.g_check_for_fpu_exception(current_asmdata.CurrAsmList);
+
          { perhaps i/o check ? }
          if (cs_check_io in current_settings.localswitches) and
             (po_iocheck in procdefinition.procoptions) and

+ 19 - 8
compiler/ninl.pas

@@ -4065,7 +4065,7 @@ implementation
       begin
         { create the call to the helper }
         { on entry left node contains the parameter }
-        first_arctan_real := ccallnode.createintern('fpc_arctan_real',
+        result := ccallnode.createintern('fpc_arctan_real',
                 ccallparanode.create(left,nil));
         left := nil;
       end;
@@ -4074,8 +4074,9 @@ implementation
       begin
         { create the call to the helper }
         { on entry left node contains the parameter }
-        first_abs_real := ctypeconvnode.create(ccallnode.createintern('fpc_abs_real',
+        result := ctypeconvnode.create(ccallnode.createintern('fpc_abs_real',
                 ccallparanode.create(left,nil)),resultdef);
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4088,8 +4089,9 @@ implementation
 {$endif cpufpemu}
         { create the call to the helper }
         { on entry left node contains the parameter }
-        first_sqr_real := ctypeconvnode.create(ccallnode.createintern('fpc_sqr_real',
+        result := ctypeconvnode.create(ccallnode.createintern('fpc_sqr_real',
                 ccallparanode.create(left,nil)),resultdef);
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4121,15 +4123,16 @@ implementation
             else
               internalerror(2014052101);
             end;
-            first_sqrt_real:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
+            result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
                ctypeconvnode.create_internal(left,fdef),nil)),resultdef);
           end
         else
           begin
             { create the call to the helper }
             { on entry left node contains the parameter }
-            first_sqrt_real := ctypeconvnode.create(ccallnode.createintern('fpc_sqrt_real',
+            result := ctypeconvnode.create(ccallnode.createintern('fpc_sqrt_real',
                 ccallparanode.create(left,nil)),resultdef);
+            include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
           end;
         left := nil;
       end;
@@ -4138,8 +4141,9 @@ implementation
       begin
         { create the call to the helper }
         { on entry left node contains the parameter }
-        first_ln_real := ccallnode.createintern('fpc_ln_real',
+        result := ccallnode.createintern('fpc_ln_real',
                 ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4147,8 +4151,9 @@ implementation
       begin
         { create the call to the helper }
         { on entry left node contains the parameter }
-        first_cos_real := ccallnode.createintern('fpc_cos_real',
+        result := ccallnode.createintern('fpc_cos_real',
                 ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4156,8 +4161,9 @@ implementation
       begin
         { create the call to the helper }
         { on entry left node contains the parameter }
-        first_sin_real := ccallnode.createintern('fpc_sin_real',
+        result := ccallnode.createintern('fpc_sin_real',
                 ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4166,6 +4172,7 @@ implementation
         { create the call to the helper }
         { on entry left node contains the parameter }
         result := ccallnode.createintern('fpc_exp_real',ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4174,6 +4181,7 @@ implementation
         { create the call to the helper }
         { on entry left node contains the parameter }
         result := ccallnode.createintern('fpc_int_real',ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4182,6 +4190,7 @@ implementation
         { create the call to the helper }
         { on entry left node contains the parameter }
         result := ccallnode.createintern('fpc_frac_real',ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4190,6 +4199,7 @@ implementation
         { create the call to the helper }
         { on entry left node contains the parameter }
         result := ccallnode.createintern('fpc_round_real',ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 
@@ -4198,6 +4208,7 @@ implementation
         { create the call to the helper }
         { on entry left node contains the parameter }
         result := ccallnode.createintern('fpc_trunc_real',ccallparanode.create(left,nil));
+        include(tcallnode(result).callnodeflags,cnf_check_fpu_exceptions);
         left := nil;
       end;
 

+ 4 - 0
compiler/procinfo.pas

@@ -134,6 +134,10 @@ unit procinfo;
             Requires different entry code for some targets. }
           ConstructorCallingConstructor: boolean;
 
+          { true, if an FPU instruction has been generated which could raise an exception and where the flags
+            need to be checked explicitly like on RISC-V or certain ARM architectures }
+          FPUExceptionCheckNeeded : Boolean;
+
           constructor create(aparent:tprocinfo);virtual;
           destructor destroy;override;
 

+ 67 - 1
rtl/aarch64/aarch64.inc

@@ -57,14 +57,80 @@ procedure setfpsr(val: dword); nostackframe; assembler;
   end;
 
 
+const
+  FPSR_IOC = 1;
+  FPSR_DZC = 1 shl 1;
+  FPSR_OFC = 1 shl 2;
+  FPSR_UFC = 1 shl 3;
+  FPSR_IXC = 1 shl 4;
+  FPSR_IDC = 1 shl 7;
+  FPSR_EXCEPTIONS = FPSR_IOC or FPSR_DZC or FPSR_OFC or FPSR_UFC or FPSR_IXC or FPSR_IDC;
+
+
+procedure RaisePendingExceptions;
+  var
+    fpsr : dword;
+    f: TFPUException;
+  begin
+    fpsr:=getfpsr;
+    if (fpsr and FPSR_DZC) <> 0 then
+      float_raise(exZeroDivide);
+    if (fpsr and FPSR_OFC) <> 0 then
+      float_raise(exOverflow);
+    if (fpsr and FPSR_UFC) <> 0 then
+      float_raise(exUnderflow);
+    if (fpsr and FPSR_IOC) <> 0 then
+      float_raise(exInvalidOp);
+    if (fpsr and FPSR_IXC) <> 0 then
+      float_raise(exPrecision);
+    if (fpsr and FPSR_IDC) <> 0 then
+      float_raise(exDenormalized);
+    { now the soft float exceptions }
+    for f in softfloat_exception_flags do
+      float_raise(f);
+  end;
+
+
+{ as so far no AArch64 flavour which supports hard floating point exceptions, we use solely
+  the softfloat_exception_mask for masking as the masking flags are RAZ and WI if floating point
+  exceptions are not supported }
+procedure fpc_throwfpuexception;[public,alias:'FPC_THROWFPUEXCEPTION'];
+  var
+    fpsr : dword;
+    f: TFPUException;
+  begin
+    { at this point, we know already, that an exception will be risen }
+    fpsr:=getfpsr;
+
+    { check, if the exception is masked }
+    if ((fpsr and FPSR_DZC) <> 0) and (exZeroDivide in softfloat_exception_mask) then
+      fpsr:=fpsr and not(FPSR_DZC);
+    if ((fpsr and FPSR_OFC) <> 0) and (exOverflow in softfloat_exception_mask) then
+      fpsr:=fpsr and not(FPSR_OFC);
+    if ((fpsr and FPSR_UFC) <> 0) and (exUnderflow in softfloat_exception_mask) then
+      fpsr:=fpsr and not(FPSR_UFC);
+    if ((fpsr and FPSR_IOC) <> 0) and (exInvalidOp in softfloat_exception_mask) then
+      fpsr:=fpsr and not(FPSR_IOC);
+    if ((fpsr and FPSR_IXC) <> 0) and (exPrecision in softfloat_exception_mask) then
+      fpsr:=fpsr and not(FPSR_IXC);
+    if ((fpsr and FPSR_IDC) <> 0) and (exDenormalized in softfloat_exception_mask) then
+      fpsr:=fpsr and not(FPSR_IDC);
+    setfpsr(fpsr);
+    if (fpsr and FPSR_EXCEPTIONS)<>0 then
+      RaisePendingExceptions;
+  end;
+
+
 procedure fpc_enable_fpu_exceptions;
   begin
     { clear all "exception happened" flags we care about}
     setfpsr(getfpsr and not(fpu_exception_mask shr fpu_exception_mask_to_status_mask_shift));
     { enable invalid operations and division by zero exceptions. }
-    setfpcr(getfpcr or fpu_exception_mask);
+    setfpcr((getfpcr and not(fpu_exception_mask)));
+    softfloat_exception_mask:=[exPrecision,exUnderflow,exInvalidOp];
   end;
 
+
 procedure fpc_cpuinit;
   begin
     { don't let libraries influence the FPU cw set by the host program }

+ 1 - 1
rtl/aarch64/math.inc

@@ -1,5 +1,5 @@
 {
-    Implementation of mathematical routines for x86_64
+    Implementation of mathematical routines for AArch64
 
     This file is part of the Free Pascal run time library.
     Copyright (c) 1999-2005 by the Free Pascal development team

+ 18 - 3
rtl/aarch64/mathu.inc

@@ -80,9 +80,14 @@ const
 
 
 function GetExceptionMask: TFPUExceptionMask;
+  {
   var
     fpcr: dword;
+  }
   begin
+    { as I am not aware of any hardware exception supporting AArch64 implementation,
+      and else the trapping enable flags are RAZ, return the softfloat exception mask (FK)
+
     fpcr:=getfpcr;
     result:=[];
     if ((fpcr and fpu_ioe)=0) then
@@ -97,14 +102,22 @@ function GetExceptionMask: TFPUExceptionMask;
       result := result+[exPrecision];
     if ((fpcr and fpu_ide)=0) then
       result := result+[exDenormalized];
+    }
+    result:=softfloat_exception_mask;
   end;
 
 
 function SetExceptionMask(const Mask: TFPUExceptionMask): TFPUExceptionMask;
+  {
   var
     newfpcr: dword;
+  }
   begin
+    { as I am not aware of any hardware exception supporting AArch64 implementation,
+      and else the trapping enable flags are RAZ, work solely with softfloat_exception_mask (FK)
+    }
     softfloat_exception_mask:=mask;
+    {
     newfpcr:=fpu_exception_mask;
     if exInvalidOp in Mask then
       newfpcr:=newfpcr and not(fpu_ioe);
@@ -118,13 +131,15 @@ function SetExceptionMask(const Mask: TFPUExceptionMask): TFPUExceptionMask;
       newfpcr:=newfpcr and not(fpu_ixe);
     if exDenormalized in Mask then
       newfpcr:=newfpcr and not(fpu_ide);
+    }
     { clear "exception happened" flags }
     ClearExceptions(false);
     { set new exception mask }
-    setfpcr((getfpcr and not(fpu_exception_mask)) or newfpcr);
+//    setfpcr((getfpcr and not(fpu_exception_mask)) or newfpcr);
     { unsupported mask bits will remain 0 -> read exception mask again }
-    result:=GetExceptionMask;
-    softfloat_exception_mask:=result;
+//    result:=GetExceptionMask;
+//    softfloat_exception_mask:=result;
+    result:=softfloat_exception_mask;
   end;
 
 

+ 57 - 0
rtl/arm/arm.inc

@@ -47,6 +47,63 @@ begin
   end;
 end;
 {$else}
+
+
+const
+  fpu_nx = 1 shl 0;
+  fpu_uf = 1 shl 1;
+  fpu_of = 1 shl 2;
+  fpu_dz = 1 shl 3;
+  fpu_nv = 1 shl 4;
+
+function getfpscr: sizeuint; nostackframe; assembler;
+  asm
+    fmrx r0,fpscr
+  end;
+
+
+procedure setfpscr(flags : sizeuint); nostackframe; assembler;
+  asm
+    fmxr fpscr,r0
+  end;
+
+
+const
+  FPSCR_IOC = 1;
+  FPSCR_DZC = 1 shl 1;
+  FPSCR_OFC = 1 shl 2;
+  FPSCR_UFC = 1 shl 3;
+  FPSCR_IXC = 1 shl 4;
+  FPSCR_IDC = 1 shl 7;
+
+
+procedure fpc_throwfpuexception;[public,alias:'FPC_THROWFPUEXCEPTION'];
+  var
+    fpscr : longint;
+    f: TFPUException;
+  begin
+    { at this point, we know already, that an exception will be risen }
+    fpscr:=getfpscr;
+
+    if (fpscr and FPSCR_DZC) <> 0 then
+      float_raise(exZeroDivide);
+    if (fpscr and FPSCR_OFC) <> 0 then
+      float_raise(exOverflow);
+    if (fpscr and FPSCR_UFC) <> 0 then
+      float_raise(exUnderflow);
+    if (fpscr and FPSCR_IOC) <> 0 then
+      float_raise(exInvalidOp);
+    if (fpscr and FPSCR_IXC) <> 0 then
+      float_raise(exPrecision);
+    if (fpscr and FPSCR_IDC) <> 0 then
+      float_raise(exDenormalized);
+
+    { now the soft float exceptions }
+    for f in softfloat_exception_flags do
+      float_raise(f);
+  end;
+
+
 Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 begin
   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }