Browse Source

+ implement compiler support for SEH on Win64

Note: due to the way we access variables in a nested function (which in this case includes exception filters) we can not extract the finally handlers and call them (like we do on i386 and x86_64, but instead we duplicate the finally code)

git-svn-id: trunk@44941 -
svenbarth 5 years ago
parent
commit
2b59000d56

+ 1 - 0
.gitattributes

@@ -30,6 +30,7 @@ compiler/aarch64/itcpugas.pas svneol=native#text/plain
 compiler/aarch64/ncpuadd.pas svneol=native#text/plain
 compiler/aarch64/ncpucnv.pas svneol=native#text/plain
 compiler/aarch64/ncpucon.pas svneol=native#text/pascal
+compiler/aarch64/ncpuflw.pas svneol=native#text/pascal
 compiler/aarch64/ncpuinl.pas svneol=native#text/plain
 compiler/aarch64/ncpumat.pas svneol=native#text/plain
 compiler/aarch64/ncpumem.pas svneol=native#text/plain

+ 236 - 87
compiler/aarch64/cgcpu.pas

@@ -124,10 +124,11 @@ interface
 implementation
 
   uses
-    globals,verbose,systems,cutils,
+    globals,verbose,systems,cutils,cclasses,
     paramgr,fmodule,
     symtable,symsym,
     tgobj,
+    ncgutil,
     procinfo,cpupi;
 
 
@@ -1596,27 +1597,74 @@ implementation
         ref: treference;
         sr: tsuperregister;
         pairreg: tregister;
+        sehreg,sehregp : TAsmSehDirective;
       begin
         result:=0;
         reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_PREINDEXED;
         pairreg:=NR_NO;
-        { store all used registers pairwise }
-        for sr:=lowsr to highsr do
-          if sr in rg[rt].used_in_proc then
-            if pairreg=NR_NO then
-              pairreg:=newreg(rt,sr,sub)
+        { for SEH on Win64 we can only store consecutive register pairs, others
+          need to be stored with STR }
+        if target_info.system=system_aarch64_win64 then
+          begin
+            if rt=R_INTREGISTER then
+              begin
+                sehreg:=ash_savereg_x;
+                sehregp:=ash_saveregp_x;
+              end
+            else if rt=R_MMREGISTER then
+              begin
+                sehreg:=ash_savefreg_x;
+                sehregp:=ash_savefregp_x;
+              end
             else
+              internalerror(2020041304);
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    inc(result,16);
+                    if getsupreg(pairreg)=sr-1 then
+                      begin
+                        list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                        list.concat(cai_seh_directive.create_reg_offset(sehregp,pairreg,16));
+                        pairreg:=NR_NO;
+                      end
+                    else
+                      begin
+                        list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
+                        list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
+                        pairreg:=newreg(rt,sr,sub);
+                      end;
+                  end;
+            if pairreg<>NR_NO then
               begin
                 inc(result,16);
-                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
-                pairreg:=NR_NO
+                list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
+                list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
               end;
-        { one left -> store twice (stack must be 16 bytes aligned) }
-        if pairreg<>NR_NO then
+          end
+        else
           begin
-            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
-            inc(result,16);
+            { store all used registers pairwise }
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    inc(result,16);
+                    list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                    pairreg:=NR_NO
+                  end;
+            { one left -> store twice (stack must be 16 bytes aligned) }
+            if pairreg<>NR_NO then
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+                inc(result,16);
+              end;
           end;
       end;
 
@@ -1637,69 +1685,124 @@ implementation
 
     procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
       var
+        hitem: tlinkedlistitem;
+        seh_proc: tai_seh_directive;
+        templist: TAsmList;
+        suppress_endprologue: boolean;
         ref: treference;
         totalstackframesize: longint;
       begin
-        if nostackframe then
-          exit;
-        { stack pointer has to be aligned to 16 bytes at all times }
-        localsize:=align(localsize,16);
+        hitem:=list.last;
+        { pi_has_unwind_info may already be set at this point if there are
+          SEH directives in assembler body. In this case, .seh_endprologue
+          is expected to be one of those directives, and not generated here. }
+        suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
 
-        { save stack pointer and return address }
-        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
-        ref.addressmode:=AM_PREINDEXED;
-        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
-        { initialise frame pointer }
-        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
-
-        totalstackframesize:=localsize;
-        { save modified integer registers }
-        inc(totalstackframesize,
-          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
-        { only the lower 64 bits of the modified vector registers need to be
-          saved; if the caller needs the upper 64 bits, it has to save them
-          itself }
-        inc(totalstackframesize,
-          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
-
-        { allocate stack space }
-        if localsize<>0 then
+        if not nostackframe then
           begin
+            { stack pointer has to be aligned to 16 bytes at all times }
             localsize:=align(localsize,16);
-            current_procinfo.final_localsize:=localsize;
-            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+
+            if target_info.system=system_aarch64_win64 then
+              include(current_procinfo.flags,pi_has_unwind_info);
+
+            { save stack pointer and return address }
+            reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
+            ref.addressmode:=AM_PREINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+            if target_info.system=system_aarch64_win64 then
+              list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
+            { initialise frame pointer }
+            if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
+              begin
+                a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+                if target_info.system=system_aarch64_win64 then
+                  list.concat(cai_seh_directive.create(ash_setfp));
+              end
+            else
+              begin
+                gen_load_frame_for_exceptfilter(list);
+                localsize:=current_procinfo.maxpushedparasize;
+              end;
+
+            totalstackframesize:=localsize;
+            { save modified integer registers }
+            inc(totalstackframesize,
+              save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+            { only the lower 64 bits of the modified vector registers need to be
+              saved; if the caller needs the upper 64 bits, it has to save them
+              itself }
+            inc(totalstackframesize,
+              save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+            { allocate stack space }
+            if localsize<>0 then
+              begin
+                localsize:=align(localsize,16);
+                current_procinfo.final_localsize:=localsize;
+                handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+                if target_info.system=system_aarch64_win64 then
+                  list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
+              end;
+            { By default, we use the frame pointer to access parameters passed via
+              the stack and the stack pointer to address local variables and temps
+              because
+               a) we can use bigger positive than negative offsets (so accessing
+                  locals via negative offsets from the frame pointer would be less
+                  efficient)
+               b) we don't know the local size while generating the code, so
+                  accessing the parameters via the stack pointer is not possible
+                  without copying them
+              The problem with this is the get_frame() intrinsic:
+               a) it must return the same value as what we pass as parentfp
+                  parameter, since that's how it's used in the TP-style objects unit
+               b) its return value must usable to access all local data from a
+                  routine (locals and parameters), since it's all the nested
+                  routines have access to
+               c) its return value must be usable to construct a backtrace, as it's
+                  also used by the exception handling routines
+
+              The solution we use here, based on something similar that's done in
+              the MIPS port, is to generate all accesses to locals in the routine
+              itself SP-relative, and then after the code is generated and the local
+              size is known (namely, here), we change all SP-relative variables/
+              parameters into FP-relative ones. This means that they'll be accessed
+              less efficiently from nested routines, but those accesses are indirect
+              anyway and at least this way they can be accessed at all
+            }
+            if current_procinfo.has_nestedprocs or
+               (
+                 (target_info.system=system_aarch64_win64) and
+                 (current_procinfo.flags*[pi_has_implicit_finally,pi_needs_implicit_finally,pi_uses_exceptions]<>[])
+               ) then
+              begin
+                current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+                current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+              end;
           end;
-        { By default, we use the frame pointer to access parameters passed via
-          the stack and the stack pointer to address local variables and temps
-          because
-           a) we can use bigger positive than negative offsets (so accessing
-              locals via negative offsets from the frame pointer would be less
-              efficient)
-           b) we don't know the local size while generating the code, so
-              accessing the parameters via the stack pointer is not possible
-              without copying them
-          The problem with this is the get_frame() intrinsic:
-           a) it must return the same value as what we pass as parentfp
-              parameter, since that's how it's used in the TP-style objects unit
-           b) its return value must usable to access all local data from a
-              routine (locals and parameters), since it's all the nested
-              routines have access to
-           c) its return value must be usable to construct a backtrace, as it's
-              also used by the exception handling routines
-
-          The solution we use here, based on something similar that's done in
-          the MIPS port, is to generate all accesses to locals in the routine
-          itself SP-relative, and then after the code is generated and the local
-          size is known (namely, here), we change all SP-relative variables/
-          parameters into FP-relative ones. This means that they'll be accessed
-          less efficiently from nested routines, but those accesses are indirect
-          anyway and at least this way they can be accessed at all
-        }
-        if current_procinfo.has_nestedprocs then
+
+        if not (pi_has_unwind_info in current_procinfo.flags) then
+          exit;
+
+        { Generate unwind data for aarch64-win64 }
+        seh_proc:=cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname);
+        if assigned(hitem) then
+          list.insertafter(seh_proc,hitem)
+        else
+          list.insert(seh_proc);
+        { the directive creates another section }
+        inc(list.section_count);
+        templist:=TAsmList.Create;
+
+        if not suppress_endprologue then
           begin
-            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
-            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            templist.concat(cai_seh_directive.create(ash_endprologue));
           end;
+        if assigned(current_procinfo.endprologue_ai) then
+          current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
+        else
+          list.concatlist(templist);
+        templist.free;
       end;
 
 
@@ -1720,35 +1823,76 @@ implementation
         ref: treference;
         sr, highestsetsr: tsuperregister;
         pairreg: tregister;
+        i,
         regcount: longint;
+        aiarr : array of tai;
       begin
         reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_POSTINDEXED;
-        { highest reg stored twice? }
         regcount:=0;
-        highestsetsr:=RS_NO;
-        for sr:=lowsr to highsr do
-          if sr in rg[rt].used_in_proc then
-            begin
-              inc(regcount);
-              highestsetsr:=sr;
-            end;
-        if odd(regcount) then
+        { due to SEH on Win64 we can only load consecutive registers and single
+          ones are done using LDR, so we need to handle this differently there }
+        if target_info.system=system_aarch64_win64 then
           begin
-            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
-            highestsetsr:=pred(highestsetsr);
-          end;
-        { load all (other) used registers pairwise }
-        pairreg:=NR_NO;
-        for sr:=highestsetsr downto lowsr do
-          if sr in rg[rt].used_in_proc then
-            if pairreg=NR_NO then
-              pairreg:=newreg(rt,sr,sub)
-            else
+            setlength(aiarr,highsr-lowsr+1);
+            pairreg:=NR_NO;
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                begin
+                  if pairreg=NR_NO then
+                    pairreg:=newreg(rt,sr,sub)
+                  else
+                    begin
+                      if getsupreg(pairreg)=sr-1 then
+                        begin
+                          aiarr[regcount]:=taicpu.op_reg_reg_ref(A_LDP,pairreg,newreg(rt,sr,sub),ref);
+                          inc(regcount);
+                          pairreg:=NR_NO;
+                        end
+                      else
+                        begin
+                          aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
+                          inc(regcount);
+                          pairreg:=newreg(rt,sr,sub);
+                        end;
+                    end;
+                end;
+            if pairreg<>NR_NO then
+              begin
+                aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
+                inc(regcount);
+                pairreg:=NR_NO;
+              end;
+            for i:=regcount-1 downto 0 do
+              list.concat(aiarr[i]);
+          end
+        else
+          begin
+            { highest reg stored twice? }
+            highestsetsr:=RS_NO;
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                begin
+                  inc(regcount);
+                  highestsetsr:=sr;
+                end;
+            if odd(regcount) then
               begin
-                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
-                pairreg:=NR_NO
+                list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+                highestsetsr:=pred(highestsetsr);
               end;
+            { load all (other) used registers pairwise }
+            pairreg:=NR_NO;
+            for sr:=highestsetsr downto lowsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                    pairreg:=NR_NO
+                  end;
+          end;
         { There can't be any register left }
         if pairreg<>NR_NO then
           internalerror(2014112602);
@@ -1807,6 +1951,11 @@ implementation
 
         { return }
         list.concat(taicpu.op_none(A_RET));
+        if (pi_has_unwind_info in current_procinfo.flags) then
+          begin
+            tcpuprocinfo(current_procinfo).dump_scopes(list);
+            list.concat(cai_seh_directive.create(ash_endproc));
+          end;
       end;
 
 

+ 1 - 1
compiler/aarch64/cpunode.pas

@@ -35,7 +35,7 @@ implementation
     symcpu,
     aasmdef,
 {$ifndef llvm}
-    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,ncpucon
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,ncpucon,ncpuflw
 {$else llvm}
     llvmnode
 {$endif llvm}

+ 81 - 2
compiler/aarch64/cpupi.pas

@@ -27,19 +27,38 @@ interface
 
   uses
     procinfo,
-    psub;
+    psub,
+    aasmdata,aasmbase;
 
   type
     tcpuprocinfo=class(tcgprocinfo)
+    private
+      scopes: TAsmList;
+      scopecount: longint;
+      unwindflags: byte;
+    public
       constructor create(aparent: tprocinfo); override;
+      destructor destroy; override;
       procedure set_first_temp_offset; override;
+      procedure add_finally_scope(startlabel,endlabel,handler:TAsmSymbol;implicit:Boolean);
+      procedure add_except_scope(trylabel,exceptlabel,endlabel,filter:TAsmSymbol);
+      procedure dump_scopes(list:tasmlist);
     end;
 
 implementation
 
   uses
+    cutils,
+    fmodule,
+    symtable,
     tgobj,
-    cpubase;
+    cpubase,
+    aasmtai;
+
+  const
+    SCOPE_FINALLY=0;
+    SCOPE_CATCHALL=1;
+    SCOPE_IMPLICIT=2;
 
   constructor tcpuprocinfo.create(aparent: tprocinfo);
     begin
@@ -56,12 +75,72 @@ implementation
       framepointer:=NR_STACK_POINTER_REG;
     end;
 
+  destructor tcpuprocinfo.destroy;
+    begin
+      scopes.free;
+      inherited destroy;
+    end;
+
   procedure tcpuprocinfo.set_first_temp_offset;
     begin
      { leave room for allocated parameters }
      tg.setfirsttemp(align(maxpushedparasize,16));
     end;
 
+  procedure tcpuprocinfo.add_finally_scope(startlabel,endlabel,handler:TAsmSymbol;implicit:Boolean);
+    begin
+      unwindflags:=unwindflags or 2;
+      if implicit then  { also needs catch functionality }
+        unwindflags:=unwindflags or 1;
+      inc(scopecount);
+      if scopes=nil then
+        scopes:=TAsmList.Create;
+
+      if implicit then
+        scopes.concat(tai_const.create_32bit(SCOPE_IMPLICIT))
+      else
+        scopes.concat(tai_const.create_32bit(SCOPE_FINALLY));
+      scopes.concat(tai_const.create_rva_sym(startlabel));
+      scopes.concat(tai_const.create_rva_sym(endlabel));
+      scopes.concat(tai_const.create_rva_sym(handler));
+    end;
+
+  procedure tcpuprocinfo.add_except_scope(trylabel,exceptlabel,endlabel,filter:TAsmSymbol);
+    begin
+      unwindflags:=unwindflags or 3;
+      inc(scopecount);
+      if scopes=nil then
+        scopes:=TAsmList.Create;
+
+      if Assigned(filter) then
+        scopes.concat(tai_const.create_rva_sym(filter))
+      else
+        scopes.concat(tai_const.create_32bit(SCOPE_CATCHALL));
+      scopes.concat(tai_const.create_rva_sym(trylabel));
+      scopes.concat(tai_const.create_rva_sym(exceptlabel));
+      scopes.concat(tai_const.create_rva_sym(endlabel));
+    end;
+
+  procedure tcpuprocinfo.dump_scopes(list: tasmlist);
+    var
+      hdir: tai_seh_directive;
+    begin
+      if (scopecount=0) then
+        exit;
+      hdir:=cai_seh_directive.create_name(ash_handler,'__FPC_specific_handler');
+      if not systemunit.iscurrentunit then
+        current_module.add_extern_asmsym('__FPC_specific_handler',AB_EXTERNAL,AT_FUNCTION);
+      hdir.data.flags:=unwindflags;
+      list.concat(hdir);
+      list.concat(cai_seh_directive.create(ash_handlerdata));
+      inc(list.section_count);
+      list.concat(tai_const.create_32bit(scopecount));
+      list.concatlist(scopes);
+      { return to text, required for GAS compatibility }
+      { This creates a tai_align which is redundant here (although harmless) }
+      new_section(list,sec_code,lower(procdef.mangledname),0);
+    end;
+
 
 begin
   cprocinfo:=tcpuprocinfo;

+ 543 - 0
compiler/aarch64/ncpuflw.pas

@@ -0,0 +1,543 @@
+{
+    Copyright (c) 2011-2020 by Free Pascal development team
+
+    Generate Win64-specific exception handling code (based on x86_64 code)
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuflw;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    node,nflw,ncgflw,psub;
+
+  type
+    taarch64raisenode=class(tcgraisenode)
+      function pass_1 : tnode;override;
+    end;
+
+    taarch64onnode=class(tcgonnode)
+      procedure pass_generate_code;override;
+    end;
+
+    taarch64tryexceptnode=class(tcgtryexceptnode)
+      procedure pass_generate_code;override;
+    end;
+
+    taarch64tryfinallynode=class(tcgtryfinallynode)
+      finalizepi: tcgprocinfo;
+      constructor create(l,r:TNode);override;
+      constructor create_implicit(l,r:TNode);override;
+      function simplify(forinline: boolean): tnode;override;
+      procedure pass_generate_code;override;
+    end;
+
+implementation
+
+  uses
+    globtype,globals,verbose,systems,fmodule,
+    nbas,ncal,nutils,
+    symconst,symsym,symdef,
+    cgbase,cgobj,cgutils,tgobj,
+    cpubase,htypechk,
+    pass_1,pass_2,
+    aasmbase,aasmtai,aasmdata,aasmcpu,procinfo,cpupi;
+
+  var
+    endexceptlabel: tasmlabel;
+
+
+{ taarch64raisenode }
+
+function taarch64raisenode.pass_1 : tnode;
+  var
+    statements : tstatementnode;
+    raisenode : tcallnode;
+  begin
+    { difference from generic code is that address stack is not popped on reraise }
+    if (target_info.system<>system_aarch64_win64) or assigned(left) then
+      result:=inherited pass_1
+    else
+      begin
+        result:=internalstatements(statements);
+        raisenode:=ccallnode.createintern('fpc_reraise',nil);
+        include(raisenode.callnodeflags,cnf_call_never_returns);
+        addstatement(statements,raisenode);
+      end;
+end;
+
+{ taarch64onnode }
+
+procedure taarch64onnode.pass_generate_code;
+  var
+    exceptvarsym : tlocalvarsym;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+
+    location_reset(location,LOC_VOID,OS_NO);
+
+    { RTL will put exceptobject into X0 when jumping here }
+    cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    { Retrieve exception variable }
+    if assigned(excepTSymtable) then
+      exceptvarsym:=tlocalvarsym(excepTSymtable.SymList[0])
+    else
+      exceptvarsym:=nil;
+
+    if assigned(exceptvarsym) then
+      begin
+        exceptvarsym.localloc.loc:=LOC_REFERENCE;
+        exceptvarsym.localloc.size:=OS_ADDR;
+        tg.GetLocal(current_asmdata.CurrAsmList,sizeof(pint),voidpointertype,exceptvarsym.localloc.reference);
+        cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_FUNCTION_RESULT_REG,exceptvarsym.localloc.reference);
+      end;
+    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    if assigned(right) then
+      secondpass(right);
+
+    { deallocate exception symbol }
+    if assigned(exceptvarsym) then
+      begin
+        tg.UngetLocal(current_asmdata.CurrAsmList,exceptvarsym.localloc.reference);
+        exceptvarsym.localloc.loc:=LOC_INVALID;
+      end;
+    cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+  end;
+
+{ taarch64tryfinallynode }
+
+function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      temprefn:
+        make_not_regable(n,[]);
+      calln:
+        include(tprocinfo(arg).flags,pi_do_call);
+      else
+        ;
+    end;
+    result:=fen_true;
+  end;
+
+function copy_parasize(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      calln:
+        tcgprocinfo(arg).allocate_push_parasize(tcallnode(n).pushed_parasize);
+      else
+        ;
+    end;
+    result:=fen_true;
+  end;
+
+constructor taarch64tryfinallynode.create(l, r: TNode);
+  begin
+    inherited create(l,r);
+    if (target_info.system=system_aarch64_win64) and
+      { Don't create child procedures for generic methods, their nested-like
+        behavior causes compilation errors because real nested procedures
+        aren't allowed for generics. Not creating them doesn't harm because
+        generic node tree is discarded without generating code. }
+       not (df_generic in current_procinfo.procdef.defoptions) then
+      begin
+        finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,r));
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        { Regvar optimization for symbols is suppressed when using exceptions, but
+          temps may be still placed into registers. This must be fixed. }
+        foreachnodestatic(r,@reset_regvars,finalizepi);
+      end;
+  end;
+
+constructor taarch64tryfinallynode.create_implicit(l, r: TNode);
+  begin
+    inherited create_implicit(l, r);
+    if (target_info.system=system_aarch64_win64) then
+      begin
+        if df_generic in current_procinfo.procdef.defoptions then
+          InternalError(2020033101);
+
+        finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,r));
+        include(finalizepi.flags,pi_do_call);
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        finalizepi.allocate_push_parasize(32);
+      end;
+  end;
+
+function taarch64tryfinallynode.simplify(forinline: boolean): tnode;
+  begin
+    result:=inherited simplify(forinline);
+    if (target_info.system<>system_aarch64_win64) then
+      exit;
+    if (result=nil) then
+      begin
+        { generate a copy of the code }
+        finalizepi.code:=right.getcopy;
+        foreachnodestatic(right,@copy_parasize,finalizepi);
+        { For implicit frames, no actual code is available at this time,
+          it is added later in assembler form. So store the nested procinfo
+          for later use. }
+        if implicitframe then
+          begin
+            current_procinfo.finalize_procinfo:=finalizepi;
+          end;
+      end;
+  end;
+
+procedure emit_nop;
+  var
+    dummy: TAsmLabel;
+  begin
+    { To avoid optimizing away the whole thing, prepend a jumplabel with increased refcount }
+    current_asmdata.getjumplabel(dummy);
+    dummy.increfs;
+    cg.a_label(current_asmdata.CurrAsmList,dummy);
+    current_asmdata.CurrAsmList.concat(Taicpu.op_none(A_NOP));
+  end;
+
+procedure taarch64tryfinallynode.pass_generate_code;
+  var
+    trylabel,
+    endtrylabel,
+    finallylabel,
+    endfinallylabel,
+    templabel,
+    oldexitlabel: tasmlabel;
+    oldflowcontrol: tflowcontrol;
+    catch_frame: boolean;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+
+    location_reset(location,LOC_VOID,OS_NO);
+
+    { Do not generate a frame that catches exceptions if the only action
+      would be reraising it. Doing so is extremely inefficient with SEH
+      (in contrast with setjmp/longjmp exception handling) }
+    catch_frame:=implicitframe and
+      (current_procinfo.procdef.proccalloption=pocall_safecall);
+
+    oldflowcontrol:=flowcontrol;
+    flowcontrol:=[fc_inflowcontrol];
+
+    templabel:=nil;
+    current_asmdata.getjumplabel(trylabel);
+    current_asmdata.getjumplabel(endtrylabel);
+    current_asmdata.getjumplabel(finallylabel);
+    current_asmdata.getjumplabel(endfinallylabel);
+    oldexitlabel:=current_procinfo.CurrExitLabel;
+    if implicitframe then
+      current_procinfo.CurrExitLabel:=finallylabel;
+
+    { Start of scope }
+    { Padding with NOP is necessary here because exceptions in called
+      procedures are seen at the next instruction, while CPU/OS exceptions
+      like AV are seen at the current instruction.
+
+      So in the following code
+
+      raise_some_exception;        //(a)
+      try
+        pchar(nil)^:='0';          //(b)
+        ...
+
+      without NOP, exceptions (a) and (b) will be seen at the same address
+      and fall into the same scope. However they should be seen in different scopes.
+    }
+
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,trylabel);
+
+    { try code }
+    if assigned(left) then
+      begin
+        { fc_unwind_xx tells exit/continue/break statements to emit special
+          unwind code instead of just JMP }
+        if not implicitframe then
+          flowcontrol:=flowcontrol+[fc_catching_exceptions,fc_unwind_exit,fc_unwind_loop];
+        secondpass(left);
+        flowcontrol:=flowcontrol-[fc_catching_exceptions,fc_unwind_exit,fc_unwind_loop];
+        if codegenerror then
+          exit;
+      end;
+
+    { finallylabel is only used in implicit frames as an exit point from nested try..finally
+      statements, if any. To prevent finalizer from being executed twice, it must come before
+      endtrylabel (bug #34772) }
+    if catch_frame then
+      begin
+        current_asmdata.getjumplabel(templabel);
+        cg.a_label(current_asmdata.CurrAsmList, finallylabel);
+        { jump over exception handler }
+        cg.a_jmp_always(current_asmdata.CurrAsmList,templabel);
+        { Handle the except block first, so endtrylabel serves both
+          as end of scope and as unwind target. This way it is possible to
+          encode everything into a single scope record. }
+        cg.a_label(current_asmdata.CurrAsmList,endtrylabel);
+        if (current_procinfo.procdef.proccalloption=pocall_safecall) then
+          begin
+            handle_safecall_exception;
+            cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
+          end
+        else
+          InternalError(2014031601);
+        cg.a_label(current_asmdata.CurrAsmList,templabel);
+      end
+    else
+      begin
+        { same as emit_nop but using finallylabel instead of dummy }
+        cg.a_label(current_asmdata.CurrAsmList,finallylabel);
+        finallylabel.increfs;
+        current_asmdata.CurrAsmList.concat(Taicpu.op_none(A_NOP));
+        cg.a_label(current_asmdata.CurrAsmList,endtrylabel);
+      end;
+
+    flowcontrol:=[fc_inflowcontrol];
+    { store the tempflags so that we can generate a copy of the finally handler
+      later on }
+    if not implicitframe then
+      finalizepi.store_tempflags;
+    { generate the inline finalizer code }
+    secondpass(right);
+
+    if codegenerror then
+      exit;
+
+    { normal exit from safecall proc must zero the result register }
+    if implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall) then
+      cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_INT,0,NR_FUNCTION_RESULT_REG);
+
+    cg.a_label(current_asmdata.CurrAsmList,endfinallylabel);
+
+    { generate the scope record in .xdata }
+    tcpuprocinfo(current_procinfo).add_finally_scope(trylabel,endtrylabel,
+      current_asmdata.RefAsmSymbol(finalizepi.procdef.mangledname,AT_FUNCTION),catch_frame);
+
+    if implicitframe then
+      current_procinfo.CurrExitLabel:=oldexitlabel;
+    flowcontrol:=oldflowcontrol;
+  end;
+
+{ taarch64tryexceptnode }
+
+procedure taarch64tryexceptnode.pass_generate_code;
+  var
+    trylabel,
+    exceptlabel,oldendexceptlabel,
+    lastonlabel,
+    exitexceptlabel,
+    continueexceptlabel,
+    breakexceptlabel,
+    oldCurrExitLabel,
+    oldContinueLabel,
+    oldBreakLabel : tasmlabel;
+    onlabel,
+    filterlabel: tasmlabel;
+    oldflowcontrol,tryflowcontrol,
+    exceptflowcontrol : tflowcontrol;
+    hnode : tnode;
+    hlist : tasmlist;
+    onnodecount : tai_const;
+    sym : tasmsymbol;
+  label
+    errorexit;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+    location_reset(location,LOC_VOID,OS_NO);
+
+    oldflowcontrol:=flowcontrol;
+    exceptflowcontrol:=[];
+    continueexceptlabel:=nil;
+    breakexceptlabel:=nil;
+
+    include(flowcontrol,fc_inflowcontrol);
+    { this can be called recursivly }
+    oldBreakLabel:=nil;
+    oldContinueLabel:=nil;
+    oldendexceptlabel:=endexceptlabel;
+
+    { save the old labels for control flow statements }
+    oldCurrExitLabel:=current_procinfo.CurrExitLabel;
+    current_asmdata.getjumplabel(exitexceptlabel);
+    if assigned(current_procinfo.CurrBreakLabel) then
+      begin
+        oldContinueLabel:=current_procinfo.CurrContinueLabel;
+        oldBreakLabel:=current_procinfo.CurrBreakLabel;
+        current_asmdata.getjumplabel(breakexceptlabel);
+        current_asmdata.getjumplabel(continueexceptlabel);
+      end;
+
+    current_asmdata.getjumplabel(exceptlabel);
+    current_asmdata.getjumplabel(endexceptlabel);
+    current_asmdata.getjumplabel(lastonlabel);
+    filterlabel:=nil;
+
+    { start of scope }
+    current_asmdata.getjumplabel(trylabel);
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,trylabel);
+
+    { control flow in try block needs no special handling,
+      just make sure that target labels are outside the scope }
+    secondpass(left);
+    tryflowcontrol:=flowcontrol;
+    if codegenerror then
+      goto errorexit;
+
+    { jump over except handlers }
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+
+    { end of scope }
+    cg.a_label(current_asmdata.CurrAsmList,exceptlabel);
+
+    { set control flow labels for the except block }
+    { and the on statements                        }
+    current_procinfo.CurrExitLabel:=exitexceptlabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=continueexceptlabel;
+        current_procinfo.CurrBreakLabel:=breakexceptlabel;
+      end;
+
+    flowcontrol:=[fc_inflowcontrol];
+    { on statements }
+    if assigned(right) then
+      begin
+        { emit filter table to a temporary asmlist }
+        hlist:=TAsmList.Create;
+        current_asmdata.getaddrlabel(filterlabel);
+        new_section(hlist,sec_rodata_norel,filterlabel.name,4);
+        cg.a_label(hlist,filterlabel);
+        onnodecount:=tai_const.create_32bit(0);
+        hlist.concat(onnodecount);
+
+        hnode:=right;
+        while assigned(hnode) do
+          begin
+            if hnode.nodetype<>onn then
+              InternalError(2011103101);
+            current_asmdata.getjumplabel(onlabel);
+            sym:=current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname,AT_DATA,true);
+            hlist.concat(tai_const.create_rva_sym(sym));
+            hlist.concat(tai_const.create_rva_sym(onlabel));
+            current_module.add_extern_asmsym(sym);
+            cg.a_label(current_asmdata.CurrAsmList,onlabel);
+            secondpass(hnode);
+            inc(onnodecount.value);
+            hnode:=tonnode(hnode).left;
+          end;
+        { add 'else' node to the filter list, too }
+        if assigned(t1) then
+          begin
+            hlist.concat(tai_const.create_32bit(-1));
+            hlist.concat(tai_const.create_rva_sym(lastonlabel));
+            inc(onnodecount.value);
+          end;
+        { now move filter table to permanent list all at once }
+        current_procinfo.aktlocaldata.concatlist(hlist);
+        hlist.free;
+      end;
+
+    cg.a_label(current_asmdata.CurrAsmList,lastonlabel);
+    if assigned(t1) then
+      begin
+        { here we don't have to reset flowcontrol           }
+        { the default and on flowcontrols are handled equal }
+        secondpass(t1);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (flowcontrol*[fc_exit,fc_break,fc_continue]<>[]) then
+          cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+      end;
+    exceptflowcontrol:=flowcontrol;
+
+    if fc_exit in exceptflowcontrol then
+      begin
+        { do some magic for exit in the try block }
+        cg.a_label(current_asmdata.CurrAsmList,exitexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_exit in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldCurrExitLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
+      end;
+
+    if fc_break in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,breakexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_loop in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldBreakLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
+      end;
+
+    if fc_continue in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,continueexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_loop in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldContinueLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
+      end;
+
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,endexceptlabel);
+    tcpuprocinfo(current_procinfo).add_except_scope(trylabel,exceptlabel,endexceptlabel,filterlabel);
+
+errorexit:
+    { restore all saved labels }
+    endexceptlabel:=oldendexceptlabel;
+
+    { restore the control flow labels }
+    current_procinfo.CurrExitLabel:=oldCurrExitLabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=oldContinueLabel;
+        current_procinfo.CurrBreakLabel:=oldBreakLabel;
+      end;
+
+    { return all used control flow statements }
+    flowcontrol:=oldflowcontrol+(exceptflowcontrol +
+      tryflowcontrol - [fc_inflowcontrol]);
+  end;
+
+initialization
+  craisenode:=taarch64raisenode;
+  connode:=taarch64onnode;
+  ctryexceptnode:=taarch64tryexceptnode;
+  ctryfinallynode:=taarch64tryfinallynode;
+end.
+

+ 226 - 1
compiler/aarch64/racpugas.pas

@@ -28,14 +28,21 @@ Unit racpugas;
 
     uses
       raatt,racpu,
+      aasmtai,
       cpubase;
 
     type
+
+      { taarch64attreader }
+
       taarch64attreader = class(tattreader)
         actoppostfix : TOpPostfix;
+        actsehdirective : TAsmSehDirective;
         function is_asmopcode(const s: string):boolean;override;
         function is_register(const s:string):boolean;override;
+        function is_targetdirective(const s: string): boolean;override;
         procedure handleopcode;override;
+        procedure handletargetdirective; override;
         procedure BuildReference(oper: taarch64operand; is64bit: boolean);
         procedure BuildOperand(oper: taarch64operand; is64bit: boolean);
         function TryBuildShifterOp(instr: taarch64instruction; opnr: longint) : boolean;
@@ -53,7 +60,7 @@ Unit racpugas;
       cutils,
       { global }
       globtype,verbose,
-      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      systems,aasmbase,aasmdata,aasmcpu,
       { symtable }
       symconst,symsym,symdef,
       procinfo,
@@ -98,6 +105,46 @@ Unit racpugas;
       end;
 
 
+    const
+      { Aarch64 subset of SEH directives. .seh_proc, .seh_endproc and .seh_endepilogue
+        excluded because they are generated automatically when needed. }
+      recognized_directives: set of TAsmSehDirective=[
+        ash_endprologue,ash_handler,ash_handlerdata,
+        ash_stackalloc,ash_nop,ash_savefplr,ash_savefplr_x,
+        ash_savereg,ash_savereg_x,ash_saveregp,ash_saveregp_x,
+        ash_savefreg,ash_savefreg_x,ash_savefregp,ash_savefregp_x,
+        ash_setfp,ash_addfp
+      ];
+
+
+    function taarch64attreader.is_targetdirective(const s: string): boolean;
+      var
+        i: TAsmSehDirective;
+      begin
+        result:=false;
+        if target_info.system<>system_aarch64_win64 then
+          exit;
+
+        for i:=low(TAsmSehDirective) to high(TAsmSehDirective) do
+          begin
+            if not (i in recognized_directives) then
+              continue;
+            if s=sehdirectivestr[i] then
+              begin
+                actsehdirective:=i;
+                result:=true;
+                break;
+              end;
+          end;
+        { allow SEH directives only in pure assember routines }
+        if result and not (po_assembler in current_procinfo.procdef.procoptions) then
+          begin
+            Message(asmr_e_seh_in_pure_asm_only);
+            result:=false;
+          end;
+      end;
+
+
     procedure taarch64attreader.ReadSym(oper: taarch64operand; is64bit: boolean);
       var
          tempstr, mangledname : string;
@@ -1035,6 +1082,184 @@ Unit racpugas;
       end;
 
 
+    procedure taarch64attreader.handletargetdirective;
+
+      function maxoffset(ash:TAsmSehDirective):aint;
+        begin
+          case ash of
+            ash_savefplr,
+            ash_saveregp,
+            ash_savereg,
+            ash_savefregp,
+            ash_savefreg:
+              result:=504;
+            ash_savefplr_x,
+            ash_saveregp_x,
+            ash_savefregp_x:
+              result:=-512;
+            ash_savereg_x,
+            ash_savefreg_x:
+              result:=-256;
+            ash_addfp:
+              result:=2040;
+            else
+              internalerror(2020041204);
+          end;
+        end;
+
+      procedure add_reg_with_offset(ash:TAsmSehDirective;hreg:tregister;hnum:aint;neg:boolean);
+        begin
+          if (neg and ((hnum>0) or (hnum<maxoffset(ash)) or (((-hnum) and $7)<>0))) or
+              (not neg and ((hnum<0) or (hnum>maxoffset(ash)) or ((hnum and $7)<>0))) then
+            Message1(asmr_e_bad_seh_directive_offset,sehdirectivestr[actsehdirective])
+          else
+            begin
+              if neg then
+                hnum:=-hnum;
+              if hreg=NR_NO then
+                curlist.concat(cai_seh_directive.create_offset(actsehdirective,hnum))
+              else
+                curlist.concat(cai_seh_directive.create_reg_offset(actsehdirective,hreg,hnum));
+            end;
+        end;
+
+      var
+        hreg,
+        hreg2 : TRegister;
+        hnum : aint;
+        flags : integer;
+        ai : tai_seh_directive;
+        hs : string;
+        err : boolean;
+      begin
+        if actasmtoken<>AS_TARGET_DIRECTIVE then
+          InternalError(2020033102);
+        Consume(AS_TARGET_DIRECTIVE);
+        Include(current_procinfo.flags,pi_has_unwind_info);
+
+        case actsehdirective of
+          ash_nop,
+          ash_setfp,
+          ash_endprologue,
+          ash_handlerdata:
+            curlist.concat(cai_seh_directive.create(actsehdirective));
+
+          ash_handler:
+            begin
+              hs:=actasmpattern;
+              Consume(AS_ID);
+              flags:=0;
+              err:=false;
+              while actasmtoken=AS_COMMA do
+                begin
+                  Consume(AS_COMMA);
+                  if actasmtoken=AS_AT then
+                    begin
+                      Consume(AS_AT);
+                      if actasmtoken=AS_ID then
+                        begin
+                          uppervar(actasmpattern);
+                          if actasmpattern='EXCEPT' then
+                            flags:=flags or 1
+                          else if actasmpattern='UNWIND' then
+                            flags:=flags or 2
+                          else
+                            err:=true;
+                          Consume(AS_ID);
+                        end
+                      else
+                        err:=true;
+                    end
+                  else
+                    err:=true;
+                  if err then
+                    begin
+                      Message(asmr_e_syntax_error);
+                      RecoverConsume(false);
+                      exit;
+                    end;
+                end;
+
+              ai:=cai_seh_directive.create_name(ash_handler,hs);
+              ai.data.flags:=flags;
+              curlist.concat(ai);
+            end;
+          ash_savefplr,
+          ash_savefplr_x:
+            begin
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,NR_NO,hnum,actsehdirective=ash_savefplr_x);
+            end;
+          ash_savereg,
+          ash_savereg_x:
+            begin
+              hreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_INTREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<19) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              Consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savereg_x);
+            end;
+          ash_saveregp,
+          ash_saveregp_x:
+            begin
+              hreg:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_INTREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<19) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hreg2:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg2)<>R_INTREGISTER) or (getsubreg(hreg2)<>R_SUBWHOLE) or (getsupreg(hreg2)<>getsupreg(hreg)+1) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_saveregp_x);
+            end;
+          ash_savefreg,
+          ash_savefreg_x:
+            begin
+              hreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_MMREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<8) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              Consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savefreg_x);
+            end;
+          ash_savefregp,
+          ash_savefregp_x:
+            begin
+              hreg:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_MMREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<8) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hreg2:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg2)<>R_MMREGISTER) or (getsubreg(hreg2)<>R_SUBWHOLE) or (getsupreg(hreg2)<>getsupreg(hreg)+1) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savefregp_x);
+            end;
+          ash_stackalloc:
+            begin
+              hnum:=BuildConstExpression(false,false);
+              if (hnum<0) or (hnum>$FFFFFF) or ((hnum and 7)<>0) then
+                Message1(asmr_e_bad_seh_directive_offset,sehdirectivestr[ash_stackalloc])
+              else
+                curlist.concat(cai_seh_directive.create_offset(ash_stackalloc,hnum));
+            end;
+          else
+            InternalError(2020033103);
+        end;
+        if actasmtoken<>AS_SEPARATOR then
+          Consume(AS_SEPARATOR);
+      end;
+
+
 {*****************************************************************************
                                      Initialize
 *****************************************************************************}

+ 20 - 2
compiler/aasmtai.pas

@@ -398,7 +398,10 @@ interface
           ash_endprologue,ash_handler,ash_handlerdata,
           ash_eh,ash_32,ash_no32,
           ash_setframe,ash_stackalloc,ash_pushreg,
-          ash_savereg,ash_savexmm,ash_pushframe,
+          ash_savereg,ash_savereg_x,ash_saveregp,ash_saveregp_x,
+          ash_savexmm,ash_savefreg,ash_savefreg_x,ash_savefregp,ash_savefregp_x,ash_pushframe,
+          ash_setfp,ash_addfp,ash_savefplr,ash_savefplr_x,
+          ash_nop,
           ash_pushnv,ash_savenv
         );
 
@@ -439,7 +442,10 @@ interface
         '.seh_endprologue','.seh_handler','.seh_handlerdata',
         '.seh_eh','.seh_32','seh_no32',
         '.seh_setframe','.seh_stackalloc','.seh_pushreg',
-        '.seh_savereg','.seh_savexmm','.seh_pushframe',
+        '.seh_savereg','.seh_savereg_x','.seh_saveregp','.seh_saveregp_x',
+        '.seh_savexmm','.seh_savefreg','.seh_savefreg_x','.seh_savefregp','.seh_savefregp_x','.seh_pushframe',
+        '.seh_setfp','.seh_addfp','.seh_savefplr','.seh_savefplr_x',
+        '.seh_nop',
         '.pushnv','.savenv'
       );
       symbolpairkindstr: array[TSymbolPairKind] of string[11]=(
@@ -3334,8 +3340,20 @@ implementation
         sd_offset,     { stackalloc }
         sd_reg,        { pushreg }
         sd_regoffset,  { savereg }
+        sd_regoffset,  { savereg_x }
+        sd_regoffset,  { saveregp }
+        sd_regoffset,  { saveregp_x }
         sd_regoffset,  { savexmm }
+        sd_regoffset,  { savefreg }
+        sd_regoffset,  { savefreg_x }
+        sd_regoffset,  { savefregp }
+        sd_regoffset,  { savefregp_x }
         sd_none,       { pushframe }
+        sd_none,       { setfp }
+        sd_none,       { addfp }
+        sd_offset,     { savefplr }
+        sd_offset,     { savefplr_x }
+        sd_none,       { nop }
         sd_reg,        { pushnv }
         sd_none        { savenv }
       );

+ 5 - 2
compiler/ogcoff.pas

@@ -2959,7 +2959,7 @@ const pemagic : array[0..3] of byte = (
         objreloc:TObjRelocation;
         i,j:longint;
       begin
-        if target_info.system<>system_x86_64_win64 then
+        if not (target_info.system in [system_x86_64_win64,system_aarch64_win64]) then
           exit;
         exesec:=FindExeSection('.pdata');
         if exesec=nil then
@@ -2989,7 +2989,10 @@ const pemagic : array[0..3] of byte = (
                         .eh_frame sections. }
                     break;
                   end;
-                inc(j,3);
+                if target_info.system=system_aarch64_win64 then
+                  inc(j,2)
+                else
+                  inc(j,3);
               end;
           end;
       end;

+ 6 - 1
compiler/psub.pas

@@ -1973,7 +1973,12 @@ implementation
             cg.set_regalloc_live_range_direction(rad_forward);
 
             if assigned(finalize_procinfo) then
-              generate_exceptfilter(tcgprocinfo(finalize_procinfo))
+              begin
+                if target_info.system in [system_aarch64_win64] then
+                  tcgprocinfo(finalize_procinfo).store_tempflags
+                else
+                  generate_exceptfilter(tcgprocinfo(finalize_procinfo));
+              end
             else if not temps_finalized then
               begin
                 hlcg.gen_finalize_code(templist);

+ 2 - 0
compiler/x86_64/win64unw.pas

@@ -397,6 +397,8 @@ begin
     ash_pushnv,
     ash_savenv:
       internalerror(2019050712);
+    else
+      internalerror(2020041901);
   end;
 end;