瀏覽代碼

+ Xtensa: initial skeleton completed
+ xtensa-embedded support in the compiler

git-svn-id: trunk@44318 -

florian 5 年之前
父節點
當前提交
9e40d536cb

+ 19 - 0
.gitattributes

@@ -1011,6 +1011,22 @@ compiler/x86_64/x8664nop.inc svneol=native#text/plain
 compiler/x86_64/x8664op.inc svneol=native#text/plain
 compiler/x86_64/x8664pro.inc svneol=native#text/plain
 compiler/x86_64/x8664tab.inc svneol=native#text/plain
+compiler/xtensa/aasmcpu.pas svneol=native#text/pascal
+compiler/xtensa/agcpugas.pas svneol=native#text/pascal
+compiler/xtensa/aoptcpu.pas svneol=native#text/pascal
+compiler/xtensa/aoptcpub.pas svneol=native#text/pascal
+compiler/xtensa/cgcpu.pas svneol=native#text/pascal
+compiler/xtensa/cpubase.pas svneol=native#text/pascal
+compiler/xtensa/cpuinfo.pas svneol=native#text/pascal
+compiler/xtensa/cpunode.pas svneol=native#text/pascal
+compiler/xtensa/cpupara.pas svneol=native#text/pascal
+compiler/xtensa/cpupi.pas svneol=native#text/pascal
+compiler/xtensa/cputarg.pas svneol=native#text/pascal
+compiler/xtensa/hlcgcpu.pas svneol=native#text/pascal
+compiler/xtensa/itcpugas.pas svneol=native#text/pascal
+compiler/xtensa/racpugas.pas svneol=native#text/pascal
+compiler/xtensa/raxtensa.pas svneol=native#text/pascal
+compiler/xtensa/rgcpu.pas svneol=native#text/pascal
 compiler/xtensa/rxtensacon.inc svneol=native#text/plain
 compiler/xtensa/rxtensadwa.inc svneol=native#text/plain
 compiler/xtensa/rxtensanor.inc svneol=native#text/plain
@@ -1020,6 +1036,9 @@ compiler/xtensa/rxtensasri.inc svneol=native#text/plain
 compiler/xtensa/rxtensasta.inc svneol=native#text/plain
 compiler/xtensa/rxtensastd.inc svneol=native#text/plain
 compiler/xtensa/rxtensasup.inc svneol=native#text/plain
+compiler/xtensa/symcpu.pas svneol=native#text/pascal
+compiler/xtensa/xtensaatt.inc svneol=native#text/plain
+compiler/xtensa/xtensaop.inc svneol=native#text/plain
 compiler/xtensa/xtensareg.dat svneol=native#text/plain
 /fpmake.pp svneol=native#text/plain
 /fpmake_add1.inc svneol=native#text/plain

+ 12 - 0
compiler/fpcdefs.inc

@@ -308,6 +308,18 @@
   {$define cpurequiresproperalignment}
 {$endif riscv64}
 
+{$ifdef xtensa}
+  {$define cpu32bit}
+  {$define cpu32bitaddr}
+  {$define cpu32bitalu}
+  {$define cpufpemu}
+  {$define cputargethasfixedstack}
+  {$define cpuneedsmulhelper}
+  {$define cpuneedsdivhelper}
+  {$define cpucapabilities}
+  {$define cpurequiresproperalignment}
+{$endif xtensa}
+
 { Stabs is not officially supported on 64 bit targets by gdb, except on Mac OS X
   (but there we don't support it)
 }

+ 6 - 0
compiler/globals.pas

@@ -549,6 +549,12 @@ interface
         asmcputype : cpu_none;
         fputype : fpu_fd;
   {$endif riscv64}
+  {$ifdef xtensa}
+        cputype : cpu_none;
+        optimizecputype : cpu_none;
+        asmcputype : cpu_none;
+        fputype : fpu_none;
+  {$endif xtensa}
 {$endif not GENERIC_CPU}
         asmmode : asmmode_standard;
 {$ifndef jvm}

+ 1 - 1
compiler/options.pas

@@ -4213,7 +4213,7 @@ begin
      ((target_info.system in [system_arm_wince,system_arm_gba,
          system_m68k_amiga,system_m68k_atari,
          system_arm_nds,system_arm_embedded,
-         system_riscv32_embedded,system_riscv64_embedded])
+         system_riscv32_embedded,system_riscv64_embedded,system_xtensa_embedded])
 {$ifdef arm}
       or (target_info.abi=abi_eabi)
 {$endif arm}

+ 6 - 0
compiler/pp.pas

@@ -178,6 +178,12 @@ program pp;
   {$endif CPUDEFINED}
   {$define CPUDEFINED}
 {$endif RISCV64}
+{$ifdef XTENSA}
+  {$ifdef CPUDEFINED}
+    {$fatal ONLY one of the switches for the CPU type must be defined}
+  {$endif CPUDEFINED}
+  {$define CPUDEFINED}
+{$endif XTENSA}
 
 {$ifndef CPUDEFINED}
   {$fatal A CPU type switch must be defined}

+ 4 - 2
compiler/systems.inc

@@ -54,7 +54,8 @@
              cpu_wasm,                     { 17 }
              cpu_sparc64,                  { 18 }
              cpu_riscv32,                  { 19 }
-             cpu_riscv64                   { 20 }
+             cpu_riscv64,                  { 20 }
+             cpu_xtensa                    { 21 }
        );
 
        tasmmode= (asmmode_none
@@ -188,7 +189,8 @@
              system_riscv32_embedded,   { 99 }
              system_aarch64_android,    { 100 }
              system_x86_64_android,     { 101 }
-             system_x86_64_haiku        { 102 }
+             system_x86_64_haiku,       { 102 }
+             system_xtensa_embedded     { 103 }
        );
 
      type

+ 1 - 1
compiler/systems.pas

@@ -424,7 +424,7 @@ interface
        cpu2str : array[TSystemCpu] of string[10] =
             ('','i386','m68k','alpha','powerpc','sparc','vm','ia64','x86_64',
              'mips','arm', 'powerpc64', 'avr', 'mipsel','jvm', 'i8086',
-             'aarch64', 'wasm', 'sparc64','riscv32','riscv64');
+             'aarch64', 'wasm', 'sparc64', 'riscv32', 'riscv64', 'xtensa');
 
        abiinfo : array[tabi] of tabiinfo = (
          (name: 'DEFAULT'; supported: true),

+ 74 - 1
compiler/systems/i_embed.pas

@@ -649,7 +649,74 @@ unit i_embed;
             llvmdatalayout : 'e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S32';
           );
 
-  implementation
+       system_xtensa_embedded_info : tsysteminfo =
+          (
+            system       : system_xtensa_embedded;
+            name         : 'Embedded';
+            shortname    : 'embedded';
+            flags        : [tf_needs_symbol_size,tf_files_case_sensitive,tf_requires_proper_alignment,
+                            tf_smartlink_sections,tf_init_final_units_by_calls];
+            cpu          : cpu_xtensa;
+            unit_env     : '';
+            extradefines : '';
+            exeext       : '';
+            defext       : '.def';
+            scriptext    : '.sh';
+            smartext     : '.sl';
+            unitext      : '.ppu';
+            unitlibext   : '.ppl';
+            asmext       : '.s';
+            objext       : '.o';
+            resext       : '.res';
+            resobjext    : '.or';
+            sharedlibext : '.so';
+            staticlibext : '.a';
+            staticlibprefix : 'libp';
+            sharedlibprefix : 'lib';
+            sharedClibext : '.so';
+            staticClibext : '.a';
+            staticClibprefix : 'lib';
+            sharedClibprefix : 'lib';
+            importlibprefix : 'libimp';
+            importlibext : '.a';
+            Cprefix      : '';
+            newline      : #10;
+            dirsep       : '/';
+            assem        : as_gas;
+            assemextern  : as_gas;
+            link         : ld_none;
+            linkextern   : ld_embedded;
+            ar           : ar_gnu_ar;
+            res          : res_none;
+            dbg          : dbg_dwarf2;
+            script       : script_unix;
+            endian       : endian_little;
+            alignment    :
+              (
+                procalign       : 4;
+                loopalign       : 4;
+                jumpalign       : 0;
+                jumpalignskipmax    : 0;
+                coalescealign   : 0;
+                coalescealignskipmax: 0;
+                constalignmin   : 0;
+                constalignmax   : 4;
+                varalignmin     : 0;
+                varalignmax     : 4;
+                localalignmin   : 4;
+                localalignmax   : 4;
+                recordalignmin  : 0;
+                recordalignmax  : 4;
+                maxCrecordalign : 4
+              );
+            first_parm_offset : 8;
+            stacksize    : 262144;
+            stackalign   : 4;
+            abi : abi_default;
+            llvmdatalayout : 'e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S32';
+          );
+
+ implementation
 
 initialization
 {$ifdef CPUARM}
@@ -697,4 +764,10 @@ initialization
     set_source_info(system_riscv64_embedded_info);
   {$endif embedded}
 {$endif cpuriscv64}
+{$ifdef cpuxtensa}
+  {$ifdef embedded}
+    set_source_info(system_xtensa_embedded_info);
+  {$endif embedded}
+{$endif cpuxtensa}
 end.
+

+ 6 - 1
compiler/systems/t_embed.pas

@@ -1423,7 +1423,7 @@ begin
   if success and not(cs_link_nolink in current_settings.globalswitches) then
     success:=PostProcessExecutable(FixedExeFileName,false);
 
-  if success and (target_info.system in [system_arm_embedded,system_avr_embedded,system_mipsel_embedded]) then
+  if success and (target_info.system in [system_arm_embedded,system_avr_embedded,system_mipsel_embedded,system_xtensa_embedded]) then
     begin
       success:=DoExec(FindUtil(utilsprefix+'objcopy'),'-O ihex '+
         FixedExeFileName+' '+
@@ -1647,4 +1647,9 @@ initialization
   RegisterLinker(ld_embedded,TLinkerEmbedded);
   RegisterTarget(system_riscv64_embedded_info);
 {$endif riscv64}
+
+{$ifdef xtensa}
+  RegisterLinker(ld_embedded,TLinkerEmbedded);
+  RegisterTarget(system_xtensa_embedded_info);
+{$endif xtensa}
 end.

+ 468 - 0
compiler/xtensa/aasmcpu.pas

@@ -0,0 +1,468 @@
+{
+    Copyright (c) 1999-2002 by Jonas Maebe and Thomas Schatzl
+
+    Contains the assembler object for the Risc-V 32 and Risc-V 64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit aasmcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype,verbose,
+  aasmbase,aasmtai,aasmdata,aasmsym,
+  cpubase,cgbase,cgutils;
+
+    const
+      { "mov reg,reg" source operand number }
+      O_MOV_SOURCE = 1;
+      { "mov reg,reg" source operand number }
+      O_MOV_DEST = 0;
+
+
+    type
+      taicpu = class(tai_cpu_abstract_sym)
+        constructor op_none(op : tasmop);
+
+        constructor op_reg(op : tasmop;_op1 : tregister);
+        constructor op_const(op : tasmop;_op1 : aint);
+
+        constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
+        constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+        constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+        constructor op_const_reg(op:tasmop; _op1: aint; _op2: tregister);
+
+        constructor op_const_const(op : tasmop;_op1,_op2 : aint);
+
+        constructor op_reg_reg_const_const(op: tasmop; _op1, _op2: tregister; _op3, _op4: aint);
+
+        constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
+        constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+        constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: aint);
+        constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
+        constructor op_const_reg_reg(op : tasmop;_op1 : aint;_op2, _op3 : tregister);
+        constructor op_const_reg_const(op : tasmop;_op1 : aint;_op2 : tregister;_op3 : aint);
+        constructor op_const_const_const(op : tasmop;_op1 : aint;_op2 : aint;_op3 : aint);
+
+        constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
+        constructor op_reg_bool_reg_reg(op : tasmop;_op1: tregister;_op2:boolean;_op3,_op4:tregister);
+        constructor op_reg_bool_reg_const(op : tasmop;_op1: tregister;_op2:boolean;_op3:tregister;_op4: aint);
+
+        constructor op_reg_reg_reg_const(op : tasmop; _op1, _op2, _op3 : tregister; _op4 : aint);
+        constructor op_reg_reg_reg_const_const(op : tasmop;_op1,_op2,_op3 : tregister;_op4,_op5 : aint);
+        constructor op_reg_reg_const_const_const(op : tasmop;_op1,_op2 : tregister;_op3,_op4,_op5 : aint);
+
+
+        { this is for Jmp instructions }
+        constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
+        constructor op_const_const_sym(op : tasmop;_op1,_op2 : aint;_op3: tasmsymbol);
+
+
+        constructor op_sym(op : tasmop;_op1 : tasmsymbol);
+        constructor op_sym_ofs(op : tasmop;_op1 : tasmsymbol;_op1ofs:aint);
+        constructor op_reg_sym(op : tasmop;_op1 : tregister;_op2:tasmsymbol);
+        constructor op_reg_sym_ofs(op : tasmop;_op1 : tregister;_op2:tasmsymbol;_op2ofs : aint);
+        constructor op_sym_ofs_ref(op : tasmop;_op1 : tasmsymbol;_op1ofs:aint;const _op2 : treference);
+
+        procedure loadbool(opidx:aint;_b:boolean);
+
+        function is_same_reg_move(regtype: Tregistertype):boolean; override;
+
+        { register spilling code }
+        function spilling_get_operation_type(opnr: longint): topertype;override;
+        function spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;override;
+      end;
+
+      tai_align = class(tai_align_abstract)
+        { nothing to add }
+      end;
+
+    procedure InitAsm;
+    procedure DoneAsm;
+
+
+    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+
+implementation
+
+uses cutils, cclasses;
+
+{*****************************************************************************
+                                 taicpu Constructors
+*****************************************************************************}
+
+    procedure taicpu.loadbool(opidx:aint;_b:boolean);
+      begin
+        if opidx>=ops then
+         ops:=opidx+1;
+        with oper[opidx]^ do
+         begin
+           if typ=top_ref then
+            dispose(ref);
+           b:=_b;
+           typ:=top_bool;
+         end;
+      end;
+
+
+    constructor taicpu.op_none(op : tasmop);
+      begin
+         inherited create(op);
+      end;
+
+
+    constructor taicpu.op_reg(op : tasmop;_op1 : tregister);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadreg(0,_op1);
+      end;
+
+
+    constructor taicpu.op_const(op : tasmop;_op1 : aint);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadconst(0,_op1);
+      end;
+
+
+    constructor taicpu.op_reg_reg(op : tasmop;_op1,_op2 : tregister);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+      end;
+
+    constructor taicpu.op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadconst(1,_op2);
+      end;
+
+     constructor taicpu.op_const_reg(op:tasmop; _op1: aint; _op2: tregister);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadconst(0,_op1);
+         loadreg(1,_op2);
+      end;
+
+
+    constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadref(1,_op2);
+      end;
+
+
+    constructor taicpu.op_const_const(op : tasmop;_op1,_op2 : aint);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadconst(0,_op1);
+         loadconst(1,_op2);
+      end;
+
+
+    constructor taicpu.op_reg_reg_const_const(op: tasmop; _op1, _op2: tregister; _op3, _op4: aint);
+      begin
+        inherited create(op);
+        ops := 4;
+        loadreg(0, _op1);
+        loadreg(1, _op2);
+        loadconst(2, _op3);
+        loadconst(3, _op4);
+      end;
+
+
+    constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+      end;
+
+     constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+       begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,_op3);
+      end;
+
+     constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: aint);
+       begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadsymbol(2,_op3,_op3ofs);
+      end;
+
+     constructor taicpu.op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
+       begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadref(2,_op3);
+      end;
+
+    constructor taicpu.op_const_reg_reg(op : tasmop;_op1 : aint;_op2, _op3 : tregister);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadconst(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+      end;
+
+     constructor taicpu.op_const_reg_const(op : tasmop;_op1 : aint;_op2 : tregister;_op3 : aint);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadconst(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,_op3);
+      end;
+
+
+     constructor taicpu.op_const_const_const(op : tasmop;_op1 : aint;_op2 : aint;_op3 : aint);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadconst(0,_op1);
+         loadconst(1,_op2);
+         loadconst(2,_op3);
+      end;
+
+
+     constructor taicpu.op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
+      begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadreg(3,_op4);
+      end;
+
+     constructor taicpu.op_reg_bool_reg_reg(op : tasmop;_op1: tregister;_op2:boolean;_op3,_op4:tregister);
+      begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadbool(1,_op2);
+         loadreg(2,_op3);
+         loadreg(3,_op4);
+      end;
+
+     constructor taicpu.op_reg_bool_reg_const(op : tasmop;_op1: tregister;_op2:boolean;_op3:tregister;_op4: aint);
+      begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadbool(0,_op2);
+         loadreg(0,_op3);
+         loadconst(0,cardinal(_op4));
+      end;
+
+     constructor taicpu.op_reg_reg_reg_const(op : tasmop; _op1, _op2, _op3 : tregister; _op4 : aint);
+      begin
+        inherited create(op);
+	ops := 4;
+	loadreg(0, _op1);
+	loadreg(1, _op2);
+	loadreg(2, _op3);
+	loadconst(3, cardinal(_op4));
+      end;
+
+     constructor taicpu.op_reg_reg_reg_const_const(op : tasmop;_op1,_op2,_op3 : tregister;_op4,_op5 : aint);
+      begin
+         inherited create(op);
+         ops:=5;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadconst(3,cardinal(_op4));
+         loadconst(4,cardinal(_op5));
+      end;
+
+     constructor taicpu.op_reg_reg_const_const_const(op : tasmop;_op1,_op2 : tregister;_op3,_op4,_op5 : aint);
+      begin
+         inherited create(op);
+         ops:=5;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,_op3);
+         loadconst(3,_op4);
+         loadconst(4,_op5);
+      end;
+
+    constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
+      begin
+         inherited create(op);
+         condition:=cond;
+         ops:=1;
+         loadsymbol(0,_op1,0);
+      end;
+
+     constructor taicpu.op_const_const_sym(op : tasmop;_op1,_op2 : aint; _op3: tasmsymbol);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadconst(0,_op1);
+         loadconst(1,_op2);
+         loadsymbol(2,_op3,0);
+      end;
+
+
+    constructor taicpu.op_sym(op : tasmop;_op1 : tasmsymbol);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadsymbol(0,_op1,0);
+      end;
+
+
+    constructor taicpu.op_sym_ofs(op : tasmop;_op1 : tasmsymbol;_op1ofs:aint);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadsymbol(0,_op1,_op1ofs);
+      end;
+
+    constructor taicpu.op_reg_sym(op: tasmop; _op1: tregister; _op2: tasmsymbol);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadsymbol(1,_op2,0);
+      end;
+
+
+     constructor taicpu.op_reg_sym_ofs(op : tasmop;_op1 : tregister;_op2:tasmsymbol;_op2ofs : aint);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadsymbol(1,_op2,_op2ofs);
+      end;
+
+
+    constructor taicpu.op_sym_ofs_ref(op : tasmop;_op1 : tasmsymbol;_op1ofs:aint;const _op2 : treference);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadsymbol(0,_op1,_op1ofs);
+         loadref(1,_op2);
+      end;
+
+
+    function taicpu.is_same_reg_move(regtype: Tregistertype):boolean;
+      begin
+        case regtype of
+          R_INTREGISTER:
+            result:=
+               (opcode=A_MOV) and
+               (oper[0]^.reg=oper[1]^.reg);
+          R_FPUREGISTER:
+            result:=
+               (opcode=A_MOV_S) and
+               (oper[0]^.reg=oper[1]^.reg) and
+               (oper[0]^.reg=oper[2]^.reg);
+         else
+           result:=false;
+        end;
+      end;
+
+
+    function taicpu.spilling_get_operation_type(opnr: longint): topertype;
+      begin
+        result := operand_read;
+        case opcode of
+          A_MOV:
+            if opnr=0 then
+              result:=operand_write
+            else
+              result:=operand_read;
+          else
+            ;
+        end;
+      end;
+
+
+    function taicpu.spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;
+      begin
+        result := operand_read;
+      end;
+
+
+    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+      begin
+        case getregtype(r) of
+          R_INTREGISTER:
+            result:=taicpu.op_reg_ref(A_L32I,r,ref);
+          R_FPUREGISTER:
+            result:=taicpu.op_reg_ref(A_LSI,r,ref);
+          else
+            internalerror(2020030701);
+        end;
+      end;
+
+
+    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+      begin
+        case getregtype(r) of
+          R_INTREGISTER:
+            result:=taicpu.op_reg_ref(A_S32I,r,ref);
+          R_FPUREGISTER:
+            result:=taicpu.op_reg_ref(A_SSI,r,ref);
+          else
+            internalerror(2020030701);
+        end;
+      end;
+
+
+    procedure InitAsm;
+      begin
+      end;
+
+
+    procedure DoneAsm;
+      begin
+      end;
+
+
+begin
+  cai_align:=tai_align;
+  cai_cpu:=taicpu;
+end.
+

+ 184 - 0
compiler/xtensa/agcpugas.pas

@@ -0,0 +1,184 @@
+{
+    Copyright (c) 2003 by Florian Klaempfl
+
+    This unit implements an asm for the Xtensa
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+{ This unit implements the GNU Assembler writer for the Xtensa
+}
+
+unit agcpugas;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       globtype,systems,
+       aasmtai,
+       assemble,aggas,
+       cpubase,cpuinfo;
+
+    type
+      TXtensaInstrWriter=class(TCPUInstrWriter)
+        procedure WriteInstruction(hp : tai);override;
+      end;
+
+      TXtensaGNUAssembler=class(TGNUassembler)
+        constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
+      end;
+
+  implementation
+
+    uses
+       cutils,globals,verbose,
+       aasmcpu,
+       itcpugas,
+       cgbase,cgutils;
+
+{****************************************************************************}
+{                         GNU Xtensa Assembler writer                           }
+{****************************************************************************}
+
+    constructor TXtensaGNUAssembler.CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean);
+      begin
+        inherited;
+        InstrWriter := TXtensaInstrWriter.create(self);
+      end;
+
+
+    function TXtensaGNUAssembler.MakeCmdLine: TCmdStr;
+      begin
+        Result:='';
+      end;
+
+{****************************************************************************}
+{                  Helper routines for Instruction Writer                    }
+{****************************************************************************}
+
+    function getreferencestring(var ref : treference) : string;
+      var
+        s : string;
+      begin
+         with ref do
+          begin
+{$ifdef extdebug}
+            // if base=NR_NO then
+            //   internalerror(200308292);
+
+            // if ((index<>NR_NO) or (shiftmode<>SM_None)) and ((offset<>0) or (symbol<>nil)) then
+            //   internalerror(200308293);
+{$endif extdebug}
+
+            if assigned(symbol) then
+              begin
+                s:=symbol.name;
+                if offset<>0 then
+                  s:=s+tostr_with_plus(offset);
+                if refaddr=addr_pic then
+                  s:=s+'(PLT)'
+                {else if refaddr=addr_tlscall then
+                  s:=s+'(tlscall)'};
+              end
+            else
+              begin
+                s:='['+gas_regname(base);
+                if index<>NR_NO then
+                  begin
+                     s:=s+gas_regname(index);
+                  end
+                else if offset<>0 then
+                  s:=s+', #'+tostr(offset);
+              end;
+          end;
+        getreferencestring:=s;
+      end;
+
+
+    function getopstr(const o:toper) : string;
+      var
+        hs : string;
+        first : boolean;
+        r, rs : tsuperregister;
+      begin
+        case o.typ of
+          top_reg:
+            getopstr:=gas_regname(o.reg);
+          top_const:
+            getopstr:='#'+tostr(longint(o.val));
+          top_ref:
+            if o.ref^.refaddr=addr_full then
+              begin
+                hs:=o.ref^.symbol.name;
+                if o.ref^.offset>0 then
+                 hs:=hs+'+'+tostr(o.ref^.offset)
+                else
+                 if o.ref^.offset<0 then
+                  hs:=hs+tostr(o.ref^.offset);
+                getopstr:=hs;
+              end
+            else
+              getopstr:=getreferencestring(o.ref^);
+          else
+            internalerror(2020030705);
+        end;
+      end;
+
+
+    Procedure TXtensaInstrWriter.WriteInstruction(hp : tai);
+    var op: TAsmOp;
+        postfix,s: string;
+        i: byte;
+        sep: string[3];
+    begin
+      op:=taicpu(hp).opcode;
+      postfix:='';
+      s:=#9+gas_op2str[op];
+      if taicpu(hp).ops<>0 then
+        begin
+          sep:=#9;
+          for i:=0 to taicpu(hp).ops-1 do
+            begin
+               s:=s+sep+getopstr(taicpu(hp).oper[i]^);
+               sep:=',';
+            end;
+        end;
+      owner.writer.AsmWriteLn(s);
+    end;
+
+
+    const
+       as_xtensa_gas_info : tasminfo =
+          (
+            id     : as_gas;
+
+            idtxt  : 'AS';
+            asmbin : 'as';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM';
+            supported_targets : [system_xtensa_embedded];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
+            labelprefix : '.L';
+            comment : '# ';
+            dollarsign: '$';
+          );
+
+
+begin
+  RegisterAssembler(as_xtensa_gas_info,TXtensaGNUAssembler);
+end.

+ 255 - 0
compiler/xtensa/aoptcpu.pas

@@ -0,0 +1,255 @@
+{
+    Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
+    Development Team
+
+    This unit implements the ARM64 optimizer object
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+Unit aoptcpu;
+
+{$i fpcdefs.inc}
+
+{ $define DEBUG_AOPTCPU}
+
+Interface
+
+    uses
+      globtype, globals,
+      cutils,
+      cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
+
+    Type
+      TCpuAsmOptimizer = class(TAsmOptimizer)
+        { uses the same constructor as TAopObj }
+        function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
+        function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
+        function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
+        procedure DebugMsg(const s : string; p : tai);
+      private
+        function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
+      End;
+
+Implementation
+
+  uses
+    aasmbase,
+    aoptutils,
+    cgutils,
+    verbose;
+
+{$ifdef DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
+    begin
+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
+    end;
+{$else DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
+    begin
+    end;
+{$endif DEBUG_AOPTCPU}
+
+  function CanBeCond(p : tai) : boolean;
+    begin
+      result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
+    end;
+
+
+  function RefsEqual(const r1, r2: treference): boolean;
+    begin
+      refsequal :=
+        (r1.offset = r2.offset) and
+        (r1.base = r2.base) and
+        (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+        (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
+        (r1.relsymbol = r2.relsymbol) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op);
+    end;
+
+
+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+    begin
+      result := (oper.typ = top_reg) and (oper.reg = reg);
+    end;
+
+
+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+    begin
+      result := oper1.typ = oper2.typ;
+
+      if result then
+        case oper1.typ of
+          top_const:
+            Result:=oper1.val = oper2.val;
+          top_reg:
+            Result:=oper1.reg = oper2.reg;
+          top_ref:
+            Result:=RefsEqual(oper1.ref^, oper2.ref^);
+          else Result:=false;
+        end
+    end;
+
+
+  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    Out Next: tai; reg: TRegister): Boolean;
+    begin
+      Next:=Current;
+      repeat
+        Result:=GetNextInstruction(Next,Next);
+      until not (Result) or
+            not(cs_opt_level3 in current_settings.optimizerswitches) or
+            (Next.typ<>ait_instruction) or
+            RegInInstruction(reg,Next) or
+            is_calljmp(taicpu(Next).opcode);
+    end;
+
+
+  function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
+    var
+      p: taicpu;
+    begin
+      p := taicpu(hp);
+      Result := false;
+      if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
+        exit;
+    end;
+
+
+  function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
+    var
+      p: taicpu;
+      i: longint;
+    begin
+      instructionLoadsFromReg := false;
+      if not (assigned(hp) and (hp.typ = ait_instruction)) then
+        exit;
+      p:=taicpu(hp);
+
+      i:=1;
+
+      { Start on oper[0]? }
+      if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
+        i:=0;
+
+      while(i<p.ops) do
+        begin
+          case p.oper[I]^.typ of
+            top_reg:
+              Result := (p.oper[I]^.reg = reg);
+            top_ref:
+              Result :=
+                (p.oper[I]^.ref^.base = reg) or
+                (p.oper[I]^.ref^.index = reg);
+            else
+              ;
+          end;
+          { Bailout if we found something }
+          if Result then
+            exit;
+          Inc(I);
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
+    var
+      alloc,
+      dealloc : tai_regalloc;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if MatchInstruction(movp, A_MOV) and
+        (taicpu(p).ops>=3) and
+        { We can't optimize if there is a shiftop }
+        (taicpu(movp).ops=2) and
+        MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
+       { the destination register of the mov might not be used beween p and movp }
+        not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+        { Take care to only do this for instructions which REALLY load to the first register.
+          Otherwise
+            str reg0, [reg1]
+            mov reg2, reg0
+          will be optimized to
+            str reg2, [reg1]
+        }
+        RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
+        begin
+          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
+          if assigned(dealloc) then
+            begin
+              DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
+              result:=true;
+
+              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
+                and remove it if possible }
+              asml.Remove(dealloc);
+              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  alloc.free;
+                  dealloc.free;
+                end
+              else
+                asml.InsertAfter(dealloc,p);
+
+              { try to move the allocation of the target register }
+              GetLastInstruction(movp,hp1);
+              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  asml.InsertBefore(alloc,p);
+                  { adjust used regs }
+                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
+                end;
+
+              { finally get rid of the mov }
+              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              //if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+              //  and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              //then
+              //  taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
+              asml.remove(movp);
+              movp.free;
+            end;
+        end;
+    end;
+
+begin
+  casmoptimizer:=TCpuAsmOptimizer;
+End.
+

+ 150 - 0
compiler/xtensa/aoptcpub.pas

@@ -0,0 +1,150 @@
+ {
+    Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
+    Development Team
+
+    This unit contains several types and constants necessary for the
+    optimizer to work on the ARM64 architecture
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit aoptcpub; { Assembler OPTimizer CPU specific Base }
+
+{$i fpcdefs.inc}
+
+{ enable the following define if memory references can have both a base and }
+{ index register in 1 operand                                               }
+
+{ enable the following define if memory references can have a scaled index }
+{ define RefsHaveScale}
+
+{ enable the following define if memory references can have a segment }
+{ override                                                            }
+
+{ define RefsHaveSegment}
+
+Interface
+
+Uses
+  cgbase,aasmtai,
+  cpubase,aasmcpu,AOptBase;
+
+Type
+
+{ type of a normal instruction }
+  TInstr = Taicpu;
+  PInstr = ^TInstr;
+
+{ ************************************************************************* }
+{ **************************** TCondRegs ********************************** }
+{ ************************************************************************* }
+{ Info about the conditional registers                                      }
+  TCondRegs = Object
+    Constructor Init;
+    Destructor Done;
+  End;
+
+{ ************************************************************************* }
+{ **************************** TAoptBaseCpu ******************************* }
+{ ************************************************************************* }
+
+  TAoptBaseCpu = class(TAoptBase)
+    function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
+  End;
+
+
+{ ************************************************************************* }
+{ ******************************* Constants ******************************* }
+{ ************************************************************************* }
+Const
+
+{ the maximum number of things (registers, memory, ...) a single instruction }
+{ changes                                                                    }
+
+  MaxCh = 3;
+
+{Oper index of operand that contains the source (reference) with a load }
+{instruction                                                            }
+
+  LoadSrc = 0;
+
+{Oper index of operand that contains the destination (register) with a load }
+{instruction                                                                }
+
+  LoadDst = 1;
+
+{Oper index of operand that contains the source (register) with a store }
+{instruction                                                            }
+
+  StoreSrc = 0;
+
+{Oper index of operand that contains the destination (reference) with a load }
+{instruction                                                                 }
+
+  StoreDst = 1;
+
+  aopt_uncondjmp = A_J;
+  aopt_condjmp = A_Bcc;
+
+Implementation
+
+{ ************************************************************************* }
+{ **************************** TCondRegs ********************************** }
+{ ************************************************************************* }
+  Constructor TCondRegs.init;
+    Begin
+    End;
+
+
+  Destructor TCondRegs.Done; {$ifdef inl} inline; {$endif inl}
+    Begin
+    End;
+
+
+  function TAoptBaseCpu.RegModifiedByInstruction(reg: tregister; p1: tai): boolean;
+    var
+      i: longint;
+      preg: tregister;
+    begin
+      result:=false;
+      for i:=0 to taicpu(p1).ops-1 do
+        case taicpu(p1).oper[i]^.typ of
+          top_reg:
+            begin
+              preg:=taicpu(p1).oper[i]^.reg;
+              if (getregtype(preg)=getregtype(reg)) and
+                 (getsupreg(preg)=getsupreg(reg)) and
+                 (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+                begin
+                  result:=true;
+                  exit;
+                end;
+            end;
+          //top_ref:
+          //  begin
+          //    if (taicpu(p1).oper[i]^.ref^.addressmode<>am_offset) and
+          //       (reg=taicpu(p1).oper[i]^.ref^.base) then
+          //      begin
+          //        result:=true;
+          //        exit
+          //      end;
+          //  end;
+          else
+            ;
+        end;
+    end;
+
+End.

+ 2326 - 0
compiler/xtensa/cgcpu.pas

@@ -0,0 +1,2326 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    This unit implements the code generator for Xtensa
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       globtype,parabase,
+       cgbase,cgutils,cgobj,
+       aasmbase,aasmtai,aasmdata,aasmcpu,
+       cpubase,cpuinfo,
+       node,symconst,SymType,symdef,
+       rgcpu;
+
+    type
+      tcgcpu=class(tcg)
+{$ifdef dummy}
+       protected
+        { changes register size without adding register allocation info }
+        function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
+       public
+        { simplifies "ref" so it can be used with "op". If "ref" can be used
+          with a different load/Store operation that has the same meaning as the
+          original one, "op" will be replaced with the alternative }
+        procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+        function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
+        procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+        procedure init_register_allocators;override;
+        procedure done_register_allocators;override;
+        function  getmmregister(list:TAsmList;size:tcgsize):tregister;override;
+        function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+        procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
+        procedure a_call_reg(list:TAsmList;Reg:tregister);override;
+        { General purpose instructions }
+        procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+        procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
+        procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
+        procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
+        procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        { move instructions }
+        procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
+        procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
+        procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
+        procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
+        procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
+        procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
+        procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
+        { fpu move instructions (not used, all floating point is vector unit-based) }
+        procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
+        procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
+        procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
+        procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
+        procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
+        procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
+
+        procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+        procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
+        { comparison operations }
+        procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
+        procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
+        procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
+        procedure a_jmp_name(list: TAsmList; const s: string);override;
+        procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
+        procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
+        procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
+        procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
+        procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
+        procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
+        procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
+        procedure g_maybe_got_init(list: TAsmList); override;
+        procedure g_restore_registers(list: TAsmList);override;
+        procedure g_save_registers(list: TAsmList);override;
+        procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
+        procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
+        procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
+        procedure g_profilecode(list: TAsmList);override;
+       private
+        function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+        procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+{$endif dummy}
+      end;
+
+    procedure create_codegen;
+{
+    const
+      TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
+      );
+      TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
+      );
+      TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
+        C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
+      );
+ }
+
+implementation
+
+  uses
+    globals,verbose,systems,cutils,
+    paramgr,fmodule,
+    symtable,symsym,
+    tgobj,
+    procinfo,cpupi,
+    cg64f32;
+
+ {$ifdef dummy}
+    procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+      var
+        href: treference;
+        so: tshifterop;
+        accesssize: longint;
+      begin
+        if (ref.base=NR_NO) then
+          begin
+            if ref.shiftmode<>SM_None then
+              internalerror(2014110701);
+            ref.base:=ref.index;
+            ref.index:=NR_NO;
+          end;
+        { no abitrary scale factor support (the generic code doesn't set it,
+          AArch-specific code shouldn't either) }
+        if not(ref.scalefactor in [0,1]) then
+          internalerror(2014111002);
+
+        case simple_ref_type(op,size,oppostfix,ref) of
+          sr_simple:
+            exit;
+          sr_internal_illegal:
+            internalerror(2014121702);
+          sr_complex:
+            { continue } ;
+        end;
+
+        if assigned(ref.symbol) then
+          begin
+            { internal "load symbol" instructions should already be valid }
+            if assigned(ref.symboldata) or
+               (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
+              internalerror(2014110802);
+            { no relative symbol support (needed) yet }
+            if assigned(ref.relsymbol) then
+              internalerror(2014111001);
+            { loading a symbol address (whether it's in the GOT or not) consists
+              of two parts: first load the page on which it is located, then
+              either the offset in the page or load the value at that offset in
+              the page. This final GOT-load can be relaxed by the linker in case
+              the variable itself can be stored directly in the GOT }
+            if (preferred_newbasereg=NR_NO) or
+               (ref.base=preferred_newbasereg) or
+               (ref.index=preferred_newbasereg) then
+              preferred_newbasereg:=getaddressregister(list);
+            { load the (GOT) page }
+            reference_reset_symbol(href,ref.symbol,0,8,[]);
+            if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+               ((ref.symbol.typ=AT_DATA) and
+                (ref.symbol.bind=AB_LOCAL)) then
+              href.refaddr:=addr_page
+            else
+              href.refaddr:=addr_gotpage;
+            list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
+            { load the GOT entry (= address of the variable) }
+            reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
+            href.symbol:=ref.symbol;
+            { code symbols defined in the current compilation unit do not
+              have to be accessed via the GOT }
+            if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+               ((ref.symbol.typ=AT_DATA) and
+                (ref.symbol.bind=AB_LOCAL)) then
+              begin
+                href.base:=NR_NO;
+                href.refaddr:=addr_pageoffset;
+                list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
+              end
+            else
+              begin
+                href.refaddr:=addr_gotpageoffset;
+                { use a_load_ref_reg() rather than directly encoding the LDR,
+                  so that we'll check the validity of the reference }
+                a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
+              end;
+            { set as new base register }
+            if ref.base=NR_NO then
+              ref.base:=preferred_newbasereg
+            else if ref.index=NR_NO then
+              ref.index:=preferred_newbasereg
+            else
+              begin
+                { make sure it's valid in case ref.base is SP -> make it
+                  the second operand}
+                a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
+                ref.base:=preferred_newbasereg
+              end;
+            ref.symbol:=nil;
+          end;
+
+        { base & index }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            case op of
+              A_LDR, A_STR:
+                begin
+                  if (ref.shiftmode=SM_None) and
+                     (ref.shiftimm<>0) then
+                    internalerror(2014110805);
+                  { wrong shift? (possible in case of something like
+                     array_of_2byte_rec[x].bytefield -> shift will be set 1, but
+                     the final load is a 1 byte -> can't use shift after all }
+                  if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
+                      (ref.offset<>0)) then
+                    begin
+                      if preferred_newbasereg=NR_NO then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
+                      { possibly still an invalid offset -> fall through }
+                    end
+                  else if ref.offset<>0 then
+                    begin
+                      if (preferred_newbasereg=NR_NO) or
+                         { we keep ref.index, so it must not be overwritten }
+                         (ref.index=preferred_newbasereg) then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { add to the base and not to the index, because the index
+                        may be scaled; this works even if the base is SP }
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      ref.offset:=0;
+                      ref.base:=preferred_newbasereg;
+                      { finished }
+                      exit;
+                    end
+                  else
+                    { valid -> exit }
+                    exit;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                begin
+                  { these either don't support pre-/post-indexing, or don't
+                    support it with base+index }
+                  if ref.addressmode<>AM_OFFSET then
+                    internalerror(2014110911);
+                  if preferred_newbasereg=NR_NO then
+                    preferred_newbasereg:=getaddressregister(list);
+                  if ref.shiftmode<>SM_None then
+                    begin
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                    end
+                  else
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
+                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
+                  { fall through to the handling of base + offset, since the
+                    offset may still be too big }
+                end;
+              else
+                internalerror(2014110901);
+            end;
+          end;
+
+        { base + offset }
+        if ref.base<>NR_NO then
+          begin
+            { valid offset for LDUR/STUR -> use that }
+            if (ref.addressmode=AM_OFFSET) and
+               (op in [A_LDR,A_STR]) and
+               (ref.offset>=-256) and
+               (ref.offset<=255) then
+              begin
+                if op=A_LDR then
+                  op:=A_LDUR
+                else
+                  op:=A_STUR
+              end
+            { if it's not a valid LDUR/STUR, use LDR/STR }
+            else if (op in [A_LDUR,A_STUR]) and
+               ((ref.offset<-256) or
+                (ref.offset>255) or
+                (ref.addressmode<>AM_OFFSET)) then
+              begin
+                if op=A_LDUR then
+                  op:=A_LDR
+                else
+                  op:=A_STR
+              end;
+            case op of
+              A_LDR,A_STR:
+                begin
+                  case ref.addressmode of
+                    AM_PREINDEXED:
+                      begin
+                        { since the loaded/stored register cannot be the same
+                          as the base register, we can safely add the
+                          offset to the base if it doesn't fit}
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          begin
+                            a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
+                            ref.offset:=0;
+                          end;
+                      end;
+                    AM_POSTINDEXED:
+                      begin
+                        { cannot emulate post-indexing if we have to fold the
+                          offset into the base register }
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          internalerror(2014110909);
+                        { ok }
+                      end;
+                    AM_OFFSET:
+                      begin
+                        { unsupported offset -> fold into base register }
+                        accesssize:=1 shl tcgsizep2size[size];
+                        if (ref.offset<0) or
+                           (ref.offset>(((1 shl 12)-1)*accesssize)) or
+                           ((ref.offset mod accesssize)<>0) then
+                          begin
+                            if preferred_newbasereg=NR_NO then
+                              preferred_newbasereg:=getaddressregister(list);
+                            { can we split the offset beween an
+                              "add/sub (imm12 shl 12)" and the load (also an
+                              imm12)?
+                              -- the offset from the load will always be added,
+                                that's why the lower bound has a smaller range
+                                than the upper bound; it must also be a multiple
+                                of the access size }
+                            if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
+                               (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
+                               ((ref.offset mod accesssize)=0) then
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
+                                ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
+                              end
+                            else
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                                ref.offset:=0;
+                              end;
+                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
+                          end;
+                      end
+                  end;
+                end;
+              A_LDP,A_STP:
+                begin
+                  { unsupported offset -> fold into base register (these
+                    instructions support all addressmodes) }
+                  if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
+                     (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
+                    begin
+                      case ref.addressmode of
+                        AM_POSTINDEXED:
+                          { don't emulate post-indexing if we have to fold the
+                            offset into the base register }
+                          internalerror(2014110910);
+                        AM_PREINDEXED:
+                          { this means the offset must be added to the current
+                            base register }
+                          preferred_newbasereg:=ref.base;
+                        AM_OFFSET:
+                          if preferred_newbasereg=NR_NO then
+                            preferred_newbasereg:=getaddressregister(list);
+                      end;
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
+                    end
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  { valid, checked above }
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110908);
+              else
+                internalerror(2014110708);
+            end;
+            { done }
+            exit;
+          end;
+
+        { only an offset -> change to base (+ offset 0) }
+        if preferred_newbasereg=NR_NO then
+          preferred_newbasereg:=getaddressregister(list);
+        a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
+        reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
+      end;
+
+
+    function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
+      var
+        subreg:Tsubregister;
+      begin
+        subreg:=cgsize2subreg(getregtype(reg),size);
+        result:=reg;
+        setsubreg(result,subreg);
+      end;
+
+
+    function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
+      begin
+        internalerror(2014122110);
+        { squash warning }
+        result:=NR_NO;
+      end;
+
+
+    function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+      begin
+        make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
+        list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
+        result:=ref;
+      end;
+
+
+    procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+      var
+        instr: taicpu;
+        so: tshifterop;
+        hadtmpreg: boolean;
+      begin
+        { imm12 }
+        if (a>=0) and
+           (a<=((1 shl 12)-1)) then
+          if usedest then
+            instr:=taicpu.op_reg_reg_const(op,dst,src,a)
+          else
+            instr:=taicpu.op_reg_const(op,src,a)
+        { imm12 lsl 12 }
+        else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
+          begin
+            so.shiftmode:=SM_LSL;
+            so.shiftimm:=12;
+            if usedest then
+              instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
+            else
+              instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
+          end
+        else
+          begin
+            { todo: other possible optimizations (e.g. load 16 bit constant in
+                register and then add/sub/cmp/cmn shifted the rest) }
+            if tmpreg=NR_NO then
+              begin
+                hadtmpreg:=false;
+                tmpreg:=getintregister(list,size);
+              end
+            else
+              begin
+                hadtmpreg:=true;
+                getcpuregister(list,tmpreg);
+              end;
+            a_load_const_reg(list,size,a,tmpreg);
+            if usedest then
+              instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
+            else
+              instr:=taicpu.op_reg_reg(op,src,tmpreg);
+            if hadtmpreg then
+              ungetcpuregister(list,tmpreg);
+          end;
+        if setflags then
+          setoppostfix(instr,PF_S);
+        list.concat(instr);
+      end;
+
+
+{****************************************************************************
+                              Assembler code
+****************************************************************************}
+
+    procedure tcgaarch64.init_register_allocators;
+      begin
+        inherited init_register_allocators;
+
+        rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
+            [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
+             RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
+             RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
+             { maybe we can enable this in the future for leaf functions (it's
+               the frame pointer)
+             ,RS_X29 }],
+            first_int_imreg,[]);
+
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
+            [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
+             RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
+             RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
+             RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
+            first_mm_imreg,[]);
+      end;
+
+
+    procedure tcgaarch64.done_register_allocators;
+      begin
+        rg[R_INTREGISTER].free;
+        rg[R_FPUREGISTER].free;
+        rg[R_MMREGISTER].free;
+        inherited done_register_allocators;
+      end;
+
+
+    function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
+      begin
+        case size of
+          OS_F32:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          OS_F64:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
+          else
+            internalerror(2014102701);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
+      begin
+        if not weak then
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION)))
+        else
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION)));
+      end;
+
+
+    procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
+      begin
+        list.concat(taicpu.op_reg(A_BLR,reg));
+      end;
+
+
+    {********************** load instructions ********************}
+
+    procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
+      var
+        preva: tcgint;
+        opc: tasmop;
+        shift,maxshift: byte;
+        so: tshifterop;
+        reginited: boolean;
+        mask: tcgint;
+      begin
+        { if we load a value into a 32 bit register, it is automatically
+          zero-extended to 64 bit }
+        if (hi(a)=0) and
+           (size in [OS_64,OS_S64]) then
+          begin
+            size:=OS_32;
+            reg:=makeregsize(reg,size);
+          end;
+        { values <= 32 bit are stored in a 32 bit register }
+        if not(size in [OS_64,OS_S64]) then
+          a:=cardinal(a);
+
+        if size in [OS_64,OS_S64] then
+          begin
+            mask:=-1;
+            maxshift:=64;
+          end
+        else
+          begin
+            mask:=$ffffffff;
+            maxshift:=32;
+          end;
+        { single movn enough? (to be extended) }
+        shift:=16;
+        preva:=a;
+        repeat
+          if (a shr shift)=(mask shr shift) then
+            begin
+              if shift=16 then
+                list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
+              else
+                begin
+                  shifterop_reset(so);
+                  so.shiftmode:=SM_LSL;
+                  so.shiftimm:=shift-16;
+                  list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
+                end;
+              exit;
+            end;
+          { only try the next 16 bits if the current one is all 1 bits, since
+            the movn will set all lower bits to 1 }
+          if word(a shr (shift-16))<>$ffff then
+            break;
+          inc(shift,16);
+        until shift=maxshift;
+        reginited:=false;
+        shift:=0;
+        { can be optimized later to use more movn }
+        repeat
+          { leftover is shifterconst? (don't check if we can represent it just
+            as effectively with movz/movk, as this check is expensive) }
+          if ((shift<tcgsize2size[size]*(8 div 2)) and
+              (word(a)<>0) and
+              ((a shr 16)<>0)) and
+             is_shifter_const(a shl shift,size) then
+            begin
+              if reginited then
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
+              else
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
+              exit;
+            end;
+          { set all 16 bit parts <> 0 }
+          if (word(a)<>0) or
+             ((shift=0) and
+              (a=0)) then
+            if shift=0 then
+              begin
+                list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
+                reginited:=true;
+              end
+            else
+              begin
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=shift;
+                if not reginited then
+                  begin
+                    opc:=A_MOVZ;
+                    reginited:=true;
+                  end
+                else
+                  opc:=A_MOVK;
+                list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
+              end;
+            preva:=a;
+            a:=a shr 16;
+           inc(shift,16);
+        until word(preva)=preva;
+        if not reginited then
+          internalerror(2014102702);
+      end;
+
+
+    procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
+      var
+        reg: tregister;
+      begin
+        { use the zero register if possible }
+        if a=0 then
+          begin
+            if size in [OS_64,OS_S64] then
+              reg:=NR_XZR
+            else
+              reg:=NR_WZR;
+            a_load_reg_ref(list,size,size,reg,ref);
+          end
+        else
+          inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      var
+        oppostfix:toppostfix;
+        hreg: tregister;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          begin
+            fromsize:=tosize;
+            reg:=makeregsize(list,reg,fromsize);
+          end
+        { have a 32 bit register but need a 64 bit one? }
+        else if tosize in [OS_64,OS_S64] then
+          begin
+            { sign extend if necessary }
+            if fromsize in [OS_S8,OS_S16,OS_S32] then
+              begin
+                { can't overwrite reg, may be a constant reg }
+                hreg:=getintregister(list,tosize);
+                a_load_reg_reg(list,fromsize,tosize,reg,hreg);
+                reg:=hreg;
+              end
+            else
+              { top 32 bit are zero by default }
+              reg:=makeregsize(reg,OS_64);
+            fromsize:=tosize;
+          end;
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[tosize]) then
+          begin
+            a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
+          end
+        else
+          begin
+            case tosize of
+              { signed integer registers }
+              OS_8,
+              OS_S8:
+                oppostfix:=PF_B;
+              OS_16,
+              OS_S16:
+                oppostfix:=PF_H;
+              OS_32,
+              OS_S32,
+              OS_64,
+              OS_S64:
+                oppostfix:=PF_None;
+              else
+                InternalError(200308299);
+            end;
+            handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      var
+        oppostfix:toppostfix;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize;
+        { ensure that all bits of the 32/64 register are always correctly set:
+           * default behaviour is always to zero-extend to the entire (64 bit)
+             register -> unsigned 8/16/32 bit loads only exist with a 32 bit
+             target register, as the upper 32 bit will be zeroed implicitly
+             -> always make target register 32 bit
+           * signed loads exist both with 32 and 64 bit target registers,
+             depending on whether the value should be sign extended to 32 or
+             to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
+             corresponding 64 bit register are again zeroed) -> no need to
+             change anything (we only have 32 and 64 bit registers), except that
+             when loading an OS_S32 to a 32 bit register, we don't need/can't
+             use sign extension
+        }
+        if fromsize in [OS_8,OS_16,OS_32] then
+          reg:=makeregsize(reg,OS_32);
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[fromsize]) then
+          begin
+            a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
+            exit;
+          end;
+        case fromsize of
+          { signed integer registers }
+          OS_8:
+            oppostfix:=PF_B;
+          OS_S8:
+            oppostfix:=PF_SB;
+          OS_16:
+            oppostfix:=PF_H;
+          OS_S16:
+            oppostfix:=PF_SH;
+          OS_S32:
+            if getsubreg(reg)=R_SUBD then
+              oppostfix:=PF_NONE
+            else
+              oppostfix:=PF_SW;
+          OS_32,
+          OS_64,
+          OS_S64:
+            oppostfix:=PF_None;
+          else
+            InternalError(200308297);
+        end;
+        handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
+
+        { clear upper 16 bits if the value was negative }
+        if (fromsize=OS_S8) and (tosize=OS_16) then
+          a_load_reg_reg(list,fromsize,tosize,reg,reg);
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg,tmpreg2: tregister;
+        i : Integer;
+      begin
+        case fromsize of
+          OS_64,OS_S64:
+            begin
+              { split into two 32 bit loads }
+              hreg1:=getintregister(list,OS_32);
+              hreg2:=getintregister(list,OS_32);
+              if target_info.endian=endian_big then
+                begin
+                  tmpreg:=hreg1;
+                  hreg1:=hreg2;
+                  hreg2:=tmpreg;
+                end;
+              { can we use LDP? }
+              if (ref.alignment=4) and
+                 (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+              else
+                begin
+                  a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                  href:=ref;
+                  inc(href.offset,4);
+                  a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+                end;
+              a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
+              list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+            end;
+          OS_16,OS_S16,
+          OS_32,OS_S32:
+            begin
+              if ref.alignment=2 then
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-2);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset,2)
+                      else
+                        inc(href.offset,2);
+                      a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end
+              else
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-1);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to tcgsize2size[fromsize]-1 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset)
+                      else
+                        inc(href.offset);
+                      a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end;
+            end;
+          else
+            inherited;
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
+      var
+        instr: taicpu;
+      begin
+        { we use both 32 and 64 bit registers -> insert conversion when when
+          we have to truncate/sign extend inside the (32 or 64 bit) register
+          holding the value, and when we sign extend from a 32 to a 64 bit
+          register }
+        if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
+           ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
+            (fromsize<>tosize) and
+            not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
+           ((fromsize in [OS_S8,OS_S16,OS_S32]) and
+            (tosize in [OS_64,OS_S64])) or
+           { needs to mask out the sign in the top 16 bits }
+           ((fromsize=OS_S8) and
+            (tosize=OS_16)) then
+          begin
+            case tosize of
+              OS_8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              OS_S8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_S16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              { while "mov wN, wM" automatically inserts a zero-extension and
+                hence we could encode a 64->32 bit move like that, the problem
+                is that we then can't distinguish 64->32 from 32->32 moves, and
+                the 64->32 truncation could be removed altogether... So use a
+                different instruction }
+              OS_32,
+              OS_S32:
+                { in theory, reg1 should be 64 bit here (since fromsize>tosize),
+                  but because of the way location_force_register() tries to
+                  avoid superfluous zero/sign extensions, it's not always the
+                  case -> also force reg1 to to 64 bit }
+                list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
+              OS_64,
+              OS_S64:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
+              else
+                internalerror(2002090901);
+            end;
+          end
+        else
+          begin
+            { 32 -> 32 bit move implies zero extension (sign extensions have
+              been handled above) -> also use for 32 <-> 64 bit moves }
+            if not(fromsize in [OS_64,OS_S64]) or
+               not(tosize in [OS_64,OS_S64]) then
+              instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
+            else
+              instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
+            list.Concat(instr);
+            { Notify the register allocator that we have written a move instruction so
+             it can try to eliminate it. }
+            add_move_instruction(instr);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
+      var
+         href: treference;
+         so: tshifterop;
+         op: tasmop;
+      begin
+        op:=A_LDR;
+        href:=ref;
+        { simplify as if we're going to perform a regular 64 bit load, using
+          "r" as the new base register if possible/necessary }
+        make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
+        { load literal? }
+        if assigned(href.symbol) then
+          begin
+            if (href.base<>NR_NO) or
+               (href.index<>NR_NO) or
+               not assigned(href.symboldata) then
+              internalerror(2014110912);
+            list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
+          end
+        else
+          begin
+            if href.index<>NR_NO then
+              begin
+                if href.shiftmode<>SM_None then
+                  begin
+                    { "add" supports a supperset of the shift modes supported by
+                      load/store instructions }
+                    shifterop_reset(so);
+                    so.shiftmode:=href.shiftmode;
+                    so.shiftimm:=href.shiftimm;
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
+                  end
+                else
+                  a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
+              end
+            else if href.offset<>0 then
+              a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
+            else
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
+      begin
+        internalerror(2014122107)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      begin
+        internalerror(2014122108)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      begin
+        internalerror(2014122109)
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
+      var
+        instr: taicpu;
+      begin
+        if assigned(shuffle) and
+           not shufflescalar(shuffle) then
+          internalerror(2014122104);
+        if fromsize=tosize then
+          begin
+            instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
+            { Notify the register allocator that we have written a move
+              instruction so it can try to eliminate it. }
+            add_move_instruction(instr);
+            { FMOV cannot generate a floating point exception }
+          end
+        else
+          begin
+            if (reg_cgsize(reg1)<>fromsize) or
+               (reg_cgsize(reg2)<>tosize) then
+              internalerror(2014110913);
+            instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+            maybe_check_for_fpu_exception(list);
+          end;
+        list.Concat(instr);
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122105);
+         tmpreg:=NR_NO;
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=reg;
+             reg:=getmmregister(list,fromsize);
+           end;
+         handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
+         if (fromsize<>tosize) then
+           a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122106);
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=getmmregister(list,tosize);
+             a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+             reg:=tmpreg;
+           end;
+         handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122801);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122803);
+         list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       var
+         r : tregister;
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122802);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
+           internalerror(2014122804);
+         if tcgsize2size[fromsize]<tcgsize2size[tosize] then
+           r:=makeregsize(intreg,fromsize)
+         else
+           r:=intreg;
+         list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
+       end;
+
+
+    procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
+      begin
+        case op of
+          { "xor Vx,Vx" is used to initialize global regvars to 0 }
+          OP_XOR:
+            begin
+              if (src<>dst) or
+                 (reg_cgsize(src)<>size) or
+                 assigned(shuffle) then
+                internalerror(2015011401);
+              case size of
+                OS_F32,
+                OS_F64:
+                  list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
+                else
+                  internalerror(2015011402);
+              end;
+            end
+          else
+            internalerror(2015011403);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
+      var
+        bitsize: longint;
+      begin
+        if srcsize in [OS_64,OS_S64] then
+          begin
+            bitsize:=64;
+          end
+        else
+          begin
+            bitsize:=32;
+          end;
+        { source is 0 -> dst will have to become 255 }
+        list.concat(taicpu.op_reg_const(A_CMP,src,0));
+        if reverse then
+          begin
+            list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
+            { xor 31/63 is the same as setting the lower 5/6 bits to
+              "31/63-(lower 5/6 bits of dst)" }
+            list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
+          end
+        else
+          begin
+            list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
+          end;
+        { set dst to -1 if src was 0 }
+        list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
+        { mask the -1 to 255 if src was 0 (anyone find a two-instruction
+          branch-free version? All of mine are 3...) }
+        list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,makeregsize(dst,OS_32),makeregsize(dst,OS_32)),PF_B));
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit stores }
+            hreg1:=getintregister(list,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            a_load_reg_reg(list,OS_32,OS_32,makeregsize(register,OS_32),hreg1);
+            a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use STP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
+              end;
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+      const
+        overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
+      begin
+        if (op in overflowops) and
+           (size in [OS_8,OS_S8,OS_16,OS_S16]) then
+          a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
+      begin
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            exit;
+          OP_MOVE:
+            a_load_const_reg(list,size,a,reg);
+          OP_NEG,OP_NOT:
+            internalerror(200306011);
+          else
+            a_op_const_reg_reg(list,op,size,a,reg,reg);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
+      begin
+        Case op of
+          OP_NEG,
+          OP_NOT:
+            begin
+              list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
+              maybeadjustresult(list,op,size,dst);
+            end
+          else
+            a_op_reg_reg_reg(list,op,size,src,dst,dst);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
+      var
+        l: tlocation;
+      begin
+        a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
+      var
+        hreg: tregister;
+      begin
+        { no ROLV opcode... }
+        if op=OP_ROL then
+          begin
+            case size of
+              OS_32,OS_S32,
+              OS_64,OS_S64:
+                begin
+                  hreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
+                  a_op_reg_reg(list,OP_SUB,size,src1,hreg);
+                  a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
+                  exit;
+                end;
+              else
+                internalerror(2014111005);
+            end;
+          end
+        else if (op=OP_ROR) and
+           not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
+          internalerror(2014111006);
+        if TOpCG2AsmOpReg[op]=A_NONE then
+          internalerror(2014111007);
+        list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        shiftcountmask: longint;
+        constreg: tregister;
+      begin
+        { add/sub instructions have only positive immediate operands }
+        if (op in [OP_ADD,OP_SUB]) and
+           (a<0) then
+          begin
+            if op=OP_ADD then
+              op:=op_SUB
+            else
+              op:=OP_ADD;
+            { avoid range/overflow error in case a = low(tcgint) }
+{$push}{$r-}{$q-}
+            a:=-a;
+{$pop}
+          end;
+        ovloc.loc:=LOC_VOID;
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            begin
+              a_load_reg_reg(list,size,size,src,dst);
+              exit;
+            end;
+          OP_MOVE:
+            begin
+              a_load_const_reg(list,size,a,dst);
+              exit;
+            end;
+          else
+            ;
+        end;
+        case op of
+          OP_ADD,
+          OP_SUB:
+            begin
+              handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
+              { on a 64 bit target, overflows with smaller data types
+                are handled via range errors }
+              if setflags and
+                 (size in [OS_64,OS_S64]) then
+                begin
+                  location_reset(ovloc,LOC_FLAGS,OS_8);
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                end;
+            end;
+          OP_OR,
+          OP_AND,
+          OP_XOR:
+            begin
+              if not(size in [OS_64,OS_S64]) then
+                a:=cardinal(a);
+              if is_shifter_const(a,size) then
+                list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
+              else
+                begin
+                  constreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,a,constreg);
+                  a_op_reg_reg_reg(list,op,size,constreg,src,dst);
+                end;
+            end;
+          OP_SHL,
+          OP_SHR,
+          OP_SAR:
+            begin
+              if size in [OS_64,OS_S64] then
+                shiftcountmask:=63
+              else
+                shiftcountmask:=31;
+              if (a and shiftcountmask)<>0 Then
+                list.concat(taicpu.op_reg_reg_const(
+                  TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
+              else
+                a_load_reg_reg(list,size,size,src,dst);
+              if (a and not(tcgint(shiftcountmask)))<>0 then
+                internalError(2014112101);
+            end;
+          OP_ROL,
+          OP_ROR:
+            begin
+              case size of
+                OS_32,OS_S32:
+                  if (a and not(tcgint(31)))<>0 then
+                    internalError(2014112102);
+                OS_64,OS_S64:
+                  if (a and not(tcgint(63)))<>0 then
+                    internalError(2014112103);
+                else
+                  internalError(2014112104);
+              end;
+              { there's only a ror opcode }
+              if op=OP_ROL then
+                a:=(tcgsize2size[size]*8)-a;
+              list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
+            end;
+          OP_MUL,
+          OP_IMUL,
+          OP_DIV,
+          OP_IDIV:
+            begin
+              constreg:=getintregister(list,size);
+              a_load_const_reg(list,size,a,constreg);
+              a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
+            end;
+          else
+            internalerror(2014111403);
+        end;
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        tmpreg1, tmpreg2: tregister;
+      begin
+        ovloc.loc:=LOC_VOID;
+        { overflow can only occur with 64 bit calculations on 64 bit cpus }
+        if setflags and
+           (size in [OS_64,OS_S64]) then
+          begin
+            case op of
+              OP_ADD,
+              OP_SUB:
+                begin
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
+                  ovloc.loc:=LOC_FLAGS;
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                  { finished }
+                  exit;
+                end;
+              OP_MUL:
+                begin
+                  { check whether the upper 64 bit of the 128 bit product is 0 }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication  }
+                end;
+              OP_IMUL:
+                begin
+                  { check whether the upper 64 bits of the 128 bit multiplication
+                    result have the same value as the replicated sign bit of the
+                    lower 64 bits }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
+                  { calculate lower 64 bits (afterwards, because dst may be
+                    equal to src1 or src2) }
+                  a_op_reg_reg_reg(list,op,size,src1,src2,dst);
+                  { replicate sign bit }
+                  tmpreg2:=getintregister(list,OS_64);
+                  a_op_const_reg_reg(list,OP_SAR,OS_S64,63,dst,tmpreg2);
+                  list.concat(taicpu.op_reg_reg(A_CMP,tmpreg1,tmpreg2));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { finished }
+                  exit;
+                end;
+              OP_IDIV,
+              OP_DIV:
+                begin
+                  { not handled here, needs div-by-zero check (dividing by zero
+                    just gives a 0 result on aarch64), and low(int64) div -1
+                    check for overflow) }
+                  internalerror(2014122101);
+                end;
+              else
+                internalerror(2019050936);
+            end;
+          end;
+        a_op_reg_reg_reg(list,op,size,src1,src2,dst);
+      end;
+
+
+
+  {*************** compare instructructions ****************}
+
+    procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
+      var
+        op: tasmop;
+      begin
+        if a>=0 then
+          op:=A_CMP
+        else
+          op:=A_CMN;
+        { avoid range/overflow error in case a=low(tcgint) }
+{$push}{$r-}{$q-}
+        handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
+{$pop}
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
+      begin
+        list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name,AT_FUNCTION));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(TOpCmp2AsmCond[cond]);
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
+      var
+        ai : taicpu;
+      begin
+        ai:=Taicpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(flags_to_cond(f));
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
+      begin
+        list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
+      begin
+        { we need an explicit overflow location, because there are many
+          possibilities (not just the overflow flag, which is only used for
+          signed add/sub) }
+        internalerror(2014112303);
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
+      var
+        hl : tasmlabel;
+        hflags : tresflags;
+      begin
+        if not(cs_check_overflow in current_settings.localswitches) then
+          exit;
+        current_asmdata.getjumplabel(hl);
+        case ovloc.loc of
+          LOC_FLAGS:
+            begin
+              hflags:=ovloc.resflags;
+              inverse_flags(hflags);
+              cg.a_jmp_flags(list,hflags,hl);
+            end;
+          else
+            internalerror(2014112304);
+        end;
+        a_call_name(list,'FPC_OVERFLOW',false);
+        a_label(list,hl);
+      end;
+
+  { *********** entry/exit code and address loading ************ }
+
+    function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+      var
+        ref: treference;
+        sr: tsuperregister;
+        pairreg: tregister;
+      begin
+        result:=0;
+        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
+        ref.addressmode:=AM_PREINDEXED;
+        pairreg:=NR_NO;
+        { store all used registers pairwise }
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                inc(result,16);
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                pairreg:=NR_NO
+              end;
+        { one left -> store twice (stack must be 16 bytes aligned) }
+        if pairreg<>NR_NO then
+          begin
+            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+            inc(result,16);
+          end;
+      end;
+
+
+    procedure FixupOffsets(p:TObject;arg:pointer);
+      var
+        sym: tabstractnormalvarsym absolute p;
+      begin
+        if (tsym(p).typ in [paravarsym,localvarsym]) and
+          (sym.localloc.loc=LOC_REFERENCE) and
+          (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
+          begin
+            sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
+            dec(sym.localloc.reference.offset,PLongint(arg)^);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
+      var
+        ref: treference;
+        totalstackframesize: longint;
+      begin
+        if nostackframe then
+          exit;
+        { stack pointer has to be aligned to 16 bytes at all times }
+        localsize:=align(localsize,16);
+
+        { save stack pointer and return address }
+        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
+        ref.addressmode:=AM_PREINDEXED;
+        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+        { initialise frame pointer }
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+
+        totalstackframesize:=localsize;
+        { save modified integer registers }
+        inc(totalstackframesize,
+          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+        { only the lower 64 bits of the modified vector registers need to be
+          saved; if the caller needs the upper 64 bits, it has to save them
+          itself }
+        inc(totalstackframesize,
+          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+        { allocate stack space }
+        if localsize<>0 then
+          begin
+            localsize:=align(localsize,16);
+            current_procinfo.final_localsize:=localsize;
+            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+          end;
+        { By default, we use the frame pointer to access parameters passed via
+          the stack and the stack pointer to address local variables and temps
+          because
+           a) we can use bigger positive than negative offsets (so accessing
+              locals via negative offsets from the frame pointer would be less
+              efficient)
+           b) we don't know the local size while generating the code, so
+              accessing the parameters via the stack pointer is not possible
+              without copying them
+          The problem with this is the get_frame() intrinsic:
+           a) it must return the same value as what we pass as parentfp
+              parameter, since that's how it's used in the TP-style objects unit
+           b) its return value must usable to access all local data from a
+              routine (locals and parameters), since it's all the nested
+              routines have access to
+           c) its return value must be usable to construct a backtrace, as it's
+              also used by the exception handling routines
+
+          The solution we use here, based on something similar that's done in
+          the MIPS port, is to generate all accesses to locals in the routine
+          itself SP-relative, and then after the code is generated and the local
+          size is known (namely, here), we change all SP-relative variables/
+          parameters into FP-relative ones. This means that they'll be accessed
+          less efficiently from nested routines, but those accesses are indirect
+          anyway and at least this way they can be accessed at all
+        }
+        if current_procinfo.has_nestedprocs then
+          begin
+            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
+      begin
+        { nothing to do on Darwin or Linux }
+      end;
+
+
+    procedure tcgaarch64.g_restore_registers(list:TAsmList);
+      begin
+        { done in g_proc_exit }
+      end;
+
+
+    procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      var
+        ref: treference;
+        sr, highestsetsr: tsuperregister;
+        pairreg: tregister;
+        regcount: longint;
+      begin
+        reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
+        ref.addressmode:=AM_POSTINDEXED;
+        { highest reg stored twice? }
+        regcount:=0;
+        highestsetsr:=RS_NO;
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            begin
+              inc(regcount);
+              highestsetsr:=sr;
+            end;
+        if odd(regcount) then
+          begin
+            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+            highestsetsr:=pred(highestsetsr);
+          end;
+        { load all (other) used registers pairwise }
+        pairreg:=NR_NO;
+        for sr:=highestsetsr downto lowsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                pairreg:=NR_NO
+              end;
+        { There can't be any register left }
+        if pairreg<>NR_NO then
+          internalerror(2014112602);
+      end;
+
+
+
+    procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+      var
+        ref: treference;
+        regsstored: boolean;
+        sr: tsuperregister;
+      begin
+        if not(nostackframe) and
+          { we do not need an exit stack frame when we never return
+
+            * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
+            * the entry stack frame must be normally generated because the subroutine could be still left by
+              an exception and then the unwinding code might need to restore the registers stored by the entry code
+          }
+          not(po_noreturn in current_procinfo.procdef.procoptions) then
+          begin
+            { if no registers have been stored, we don't have to subtract the
+              allocated temp space from the stack pointer }
+            regsstored:=false;
+            for sr:=RS_X19 to RS_X28 do
+              if sr in rg[R_INTREGISTER].used_in_proc then
+                begin
+                  regsstored:=true;
+                  break;
+                end;
+            if not regsstored then
+              for sr:=RS_D8 to RS_D15 do
+                if sr in rg[R_MMREGISTER].used_in_proc then
+                  begin
+                    regsstored:=true;
+                    break;
+                  end;
+            { restore registers (and stack pointer) }
+            if regsstored then
+              begin
+                if current_procinfo.final_localsize<>0 then
+                  handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
+                load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
+                load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
+              end
+            else if current_procinfo.final_localsize<>0 then
+              { restore stack pointer }
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
+
+            { restore framepointer and return address }
+            reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
+            ref.addressmode:=AM_POSTINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
+          end;
+
+        { return }
+        list.concat(taicpu.op_none(A_RET));
+      end;
+
+
+    procedure tcgaarch64.g_save_registers(list : TAsmList);
+      begin
+        { done in g_proc_entry }
+      end;
+
+
+    { ************* concatcopy ************ }
+
+    procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
+      var
+        paraloc1,paraloc2,paraloc3 : TCGPara;
+        pd : tprocdef;
+      begin
+        pd:=search_system_proc('MOVE');
+        paraloc1.init;
+        paraloc2.init;
+        paraloc3.init;
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
+        a_load_const_cgpara(list,OS_SINT,len,paraloc3);
+        a_loadaddr_ref_cgpara(list,dest,paraloc2);
+        a_loadaddr_ref_cgpara(list,source,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
+        alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        a_call_name(list,'FPC_MOVE',false);
+        dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        paraloc3.done;
+        paraloc2.done;
+        paraloc1.done;
+      end;
+
+
+    procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
+
+      var
+        sourcebasereplaced, destbasereplaced: boolean;
+
+      { get optimal memory operation to use for loading/storing data
+        in an unrolled loop }
+      procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
+        begin
+          if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=true;
+            end
+          else if (unscaledop<>A_NONE) and
+             (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=false;
+            end
+          else
+            begin
+              memop:=scaledop;
+              needsimplify:=true;
+            end;
+        end;
+
+      { adjust the offset and/or addressing mode after a load/store so it's
+        correct for the next one of the same size }
+      procedure updaterefafterloadstore(var ref: treference; oplen: longint);
+        begin
+          case ref.addressmode of
+            AM_OFFSET:
+              inc(ref.offset,oplen);
+            AM_POSTINDEXED:
+              { base register updated by instruction, next offset can remain
+                the same }
+              ;
+            AM_PREINDEXED:
+              begin
+                { base register updated by instruction -> next instruction can
+                  use post-indexing with offset = sizeof(operation) }
+                ref.offset:=0;
+                ref.addressmode:=AM_OFFSET;
+              end;
+          end;
+        end;
+
+      { generate a load/store and adjust the reference offset to the next
+        memory location if necessary }
+      procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]);
+        end;
+
+      { generate a dual load/store (ldp/stp) and adjust the reference offset to
+        the next memory location if necessary }
+      procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
+        end;
+
+      { turn a reference into a pre- or post-indexed reference for use in a
+        load/store of a particular size }
+      procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
+        var
+          tmpreg: tregister;
+          scaledoffset: longint;
+          orgaddressmode: taddressmode;
+        begin
+          scaledoffset:=tcgsize2size[opsize];
+          if scaledop in [A_LDP,A_STP] then
+            scaledoffset:=scaledoffset*2;
+          { can we use the reference as post-indexed without changes? }
+          if forcepostindexing then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_POSTINDEXED;
+              if (orgaddressmode=AM_POSTINDEXED) or
+                 ((ref.offset=0) and
+                  (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
+                begin
+                  { just change the post-indexed offset to the access size }
+                  ref.offset:=scaledoffset;
+                  { and replace the base register if that didn't happen yet
+                    (could be sp or a regvar) }
+                  if not basereplaced then
+                    begin
+                      tmpreg:=getaddressregister(list);
+                      a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                      ref.base:=tmpreg;
+                      basereplaced:=true;
+                    end;
+                  exit;
+                end;
+              ref.addressmode:=orgaddressmode;
+            end;
+{$ifdef dummy}
+          This could in theory be useful in case you have a concatcopy from
+          e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
+          very unlikely. Disabled because it still needs fixes, as it
+          also generates pre-indexed loads right now at the very end for the
+          left-over gencopies
+
+          { can we turn it into a pre-indexed reference for free? (after the
+            first operation, it will be turned into an offset one) }
+          if not forcepostindexing and
+             (ref.offset<>0) then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_PREINDEXED;
+              tmpreg:=ref.base;
+              if not basereplaced and
+                 (ref.base=tmpreg) then
+                begin
+                  tmpreg:=getaddressregister(list);
+                  a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                  ref.base:=tmpreg;
+                  basereplaced:=true;
+                end;
+              if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
+                make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              exit;
+            end;
+{$endif dummy}
+          if not forcepostindexing then
+            begin
+              ref.addressmode:=AM_OFFSET;
+              make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              { this may still cause problems if the final offset is no longer
+                a simple ref; it's a bit complicated to pass all information
+                through at all places and check that here, so play safe: we
+                currently never generate unrolled copies for more than 64
+                bytes (32 with non-double-register copies) }
+              if ref.index=NR_NO then
+                begin
+                  if ((scaledop in [A_LDP,A_STP]) and
+                      (ref.offset<((64-8)*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDUR,A_STUR]) and
+                      (ref.offset<(255-8*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDR,A_STR]) and
+                      (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
+                    exit;
+                end;
+            end;
+          tmpreg:=getaddressregister(list);
+          a_loadaddr_ref_reg(list,ref,tmpreg);
+          basereplaced:=true;
+          if forcepostindexing then
+            begin
+              reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
+              ref.addressmode:=AM_POSTINDEXED;
+            end
+          else
+            begin
+              reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
+              ref.addressmode:=AM_OFFSET;
+            end
+        end;
+
+      { prepare a reference for use by gencopy. This is done both after the
+        unrolled and regular copy loop -> get rid of post-indexing mode, make
+        sure ref is valid }
+      procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
+        var
+          simplify: boolean;
+        begin
+          if ref.addressmode=AM_POSTINDEXED then
+            ref.offset:=tcgsize2size[opsize];
+          getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
+          if simplify then
+            begin
+              makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
+              op:=scaledop;
+            end;
+        end;
+
+      { generate a copy from source to dest of size opsize/postfix }
+      procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
+        var
+          reg: tregister;
+          loadop, storeop: tasmop;
+        begin
+          preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
+          preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
+          reg:=getintregister(list,opsize);
+          genloadstore(list,loadop,reg,source,postfix,opsize);
+          genloadstore(list,storeop,reg,dest,postfix,opsize);
+        end;
+
+
+      { copy the leftovers after an unrolled or regular copy loop }
+      procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
+        begin
+          { stop post-indexing if we did so in the loop, since in that case all
+            offsets definitely can be represented now }
+          if source.addressmode=AM_POSTINDEXED then
+            begin
+              source.addressmode:=AM_OFFSET;
+              source.offset:=0;
+            end;
+          if dest.addressmode=AM_POSTINDEXED then
+            begin
+              dest.addressmode:=AM_OFFSET;
+              dest.offset:=0;
+            end;
+          { transfer the leftovers }
+          if len>=8 then
+            begin
+              dec(len,8);
+              gencopy(list,source,dest,PF_NONE,OS_64);
+            end;
+          if len>=4 then
+            begin
+              dec(len,4);
+              gencopy(list,source,dest,PF_NONE,OS_32);
+            end;
+          if len>=2 then
+            begin
+              dec(len,2);
+              gencopy(list,source,dest,PF_H,OS_16);
+            end;
+          if len>=1 then
+            begin
+              dec(len);
+              gencopy(list,source,dest,PF_B,OS_8);
+            end;
+        end;
+
+
+      const
+        { load_length + loop dec + cbnz }
+        loopoverhead=12;
+        { loop overhead + load + store }
+        totallooplen=loopoverhead + 8;
+      var
+        totalalign: longint;
+        maxlenunrolled: tcgint;
+        loadop, storeop: tasmop;
+        opsize: tcgsize;
+        postfix: toppostfix;
+        tmpsource, tmpdest: treference;
+        scaledstoreop, unscaledstoreop,
+        scaledloadop, unscaledloadop: tasmop;
+        regs: array[1..8] of tregister;
+        countreg: tregister;
+        i, regcount: longint;
+        hl: tasmlabel;
+        simplifysource, simplifydest: boolean;
+      begin
+        if len=0 then
+          exit;
+        sourcebasereplaced:=false;
+        destbasereplaced:=false;
+        { maximum common alignment }
+        totalalign:=max(1,newalignment(source.alignment,dest.alignment));
+        { use a simple load/store? }
+        if (len in [1,2,4,8]) and
+           ((totalalign>=(len div 2)) or
+            (source.alignment=len) or
+            (dest.alignment=len)) then
+          begin
+            opsize:=int_cgsize(len);
+            a_load_ref_ref(list,opsize,opsize,source,dest);
+            exit;
+          end;
+
+        { alignment > length is not useful, and would break some checks below }
+        while totalalign>len do
+          totalalign:=totalalign div 2;
+
+        { operation sizes to use based on common alignment }
+        case totalalign of
+          1:
+            begin
+              postfix:=PF_B;
+              opsize:=OS_8;
+            end;
+          2:
+            begin
+              postfix:=PF_H;
+              opsize:=OS_16;
+            end;
+          4:
+            begin
+              postfix:=PF_None;
+              opsize:=OS_32;
+            end
+          else
+            begin
+              totalalign:=8;
+              postfix:=PF_None;
+              opsize:=OS_64;
+            end;
+        end;
+        { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
+        maxlenunrolled:=min(totalalign,8)*4;
+        { ldp/stp -> 2 registers per instruction }
+        if (totalalign>=4) and
+           (len>=totalalign*2) then
+          begin
+            maxlenunrolled:=maxlenunrolled*2;
+            scaledstoreop:=A_STP;
+            scaledloadop:=A_LDP;
+            unscaledstoreop:=A_NONE;
+            unscaledloadop:=A_NONE;
+          end
+        else
+          begin
+            scaledstoreop:=A_STR;
+            scaledloadop:=A_LDR;
+            unscaledstoreop:=A_STUR;
+            unscaledloadop:=A_LDUR;
+          end;
+        { we only need 4 instructions extra to call FPC_MOVE }
+        if cs_opt_size in current_settings.optimizerswitches then
+          maxlenunrolled:=maxlenunrolled div 2;
+        if (len>maxlenunrolled) and
+           (len>totalalign*8) then
+          begin
+            g_concatcopy_move(list,source,dest,len);
+            exit;
+          end;
+
+        simplifysource:=true;
+        simplifydest:=true;
+        tmpsource:=source;
+        tmpdest:=dest;
+        { can we directly encode all offsets in an unrolled loop? }
+        if len<=maxlenunrolled then
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { the leftovers will be handled separately -> -(len mod opsize) }
+            inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
+            { additionally, the last regular load/store will be at
+              offset+len-opsize (if len-(len mod opsize)>len) }
+            if tmpsource.offset>source.offset then
+              dec(tmpsource.offset,tcgsize2size[opsize]);
+            getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
+            inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
+            if tmpdest.offset>dest.offset then
+              dec(tmpdest.offset,tcgsize2size[opsize]);
+            getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
+            tmpsource:=source;
+            tmpdest:=dest;
+            { if we can't directly encode all offsets, simplify }
+            if simplifysource then
+              begin
+                loadop:=scaledloadop;
+                makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
+              end;
+            if simplifydest then
+              begin
+                storeop:=scaledstoreop;
+                makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
+              end;
+            regcount:=len div tcgsize2size[opsize];
+            { in case we transfer two registers at a time, we copy an even
+              number of registers }
+            if loadop=A_LDP then
+              regcount:=regcount and not(1);
+            { initialise for dfa }
+            regs[low(regs)]:=NR_NO;
+            { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
+            for i:=1 to regcount do
+              regs[i]:=getintregister(list,opsize);
+            if loadop=A_LDP then
+              begin
+                { load registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
+                { store registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                for i:=1 to regcount do
+                  genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
+                for i:=1 to regcount do
+                  genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
+              end;
+            { leftover }
+            len:=len-regcount*tcgsize2size[opsize];
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
+{$endif extdebug}
+          end
+        else
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { regular loop -> definitely use post-indexing }
+            loadop:=scaledloadop;
+            makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
+            storeop:=scaledstoreop;
+            makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
+            current_asmdata.getjumplabel(hl);
+            countreg:=getintregister(list,OS_32);
+            if loadop=A_LDP then
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
+            else
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
+            a_label(list,hl);
+            a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
+            if loadop=A_LDP then
+              begin
+                regs[1]:=getintregister(list,opsize);
+                regs[2]:=getintregister(list,opsize);
+                gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
+                gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                regs[1]:=getintregister(list,opsize);
+                genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
+                genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
+              end;
+            list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
+            len:=len mod tcgsize2size[opsize];
+          end;
+        gencopyleftovers(list,tmpsource,tmpdest,len);
+      end;
+
+
+    procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
+      begin
+        { This method is integrated into g_intf_wrapper and shouldn't be called separately }
+        InternalError(2013020102);
+      end;
+
+
+    procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l1,l2: TAsmLabel;
+      begin
+        { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
+            list.concat(taicpu.op_reg_const(A_TST,r,$1f));
+            current_asmdata.getjumplabel(l1);
+            current_asmdata.getjumplabel(l2);
+            ai:=taicpu.op_sym(A_B,l1);
+            ai.is_jmp:=true;
+            ai.condition:=C_NE;
+            list.concat(ai);
+            list.concat(taicpu.op_reg_const(A_TST,r,$80));
+            ai:=taicpu.op_sym(A_B,l2);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            a_label(list,l1);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l2);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_aarch64_linux then
+          begin
+            list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
+            a_call_name(list,'_mcount',false);
+          end
+        else
+          internalerror(2020021901);
+      end;
+{$endif dummy}
+
+    procedure create_codegen;
+      begin
+//!!!        cg:=tcgcpu.Create;
+//!!!        cg64:=tcg64.Create;
+      end;
+
+end.

+ 448 - 0
compiler/xtensa/cpubase.pas

@@ -0,0 +1,448 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl and Peter Vreman
+
+    Contains the base types for the ARM
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+{# Base unit for processor information. This unit contains
+   enumerations of registers, opcodes, sizes, and other
+   such things which are processor specific.
+}
+unit cpubase;
+
+{$define USEINLINE}
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      globtype,globals,
+      cpuinfo,
+      cgbase
+      ;
+
+
+{*****************************************************************************
+                                Assembler Opcodes
+*****************************************************************************}
+
+    type
+      TAsmOp= {$i xtensaop.inc}
+
+      { This should define the array of instructions as string }
+      op2strtable=array[tasmop] of string[11];
+
+    const
+      { First value of opcode enumeration }
+      firstop = low(tasmop);
+      { Last value of opcode enumeration  }
+      lastop  = high(tasmop);
+
+{*****************************************************************************
+                                  Registers
+*****************************************************************************}
+
+    type
+      { Number of registers used for indexing in tables }
+      tregisterindex=0..{$i rxtensanor.inc}-1;
+
+    const
+      { Available Superregisters }
+      {$i rxtensasup.inc}
+
+      { No Subregisters }
+      R_SUBWHOLE = R_SUBNONE;
+
+      { Available Registers }
+      {$i rxtensacon.inc}
+
+      { Integer Super registers first and last }
+      first_int_supreg = RS_A0;
+      first_int_imreg = $10;
+
+      { Float Super register first and last }
+      first_fpu_supreg    = RS_F0;
+      first_fpu_imreg     = $08;
+
+      { MM Super register first and last }
+      first_mm_supreg    = RS_INVALID;
+      first_mm_imreg     = $30;
+
+      { TODO: Calculate bsstart}
+      regnumber_count_bsstart = 16;
+
+      regnumber_table : array[tregisterindex] of tregister = (
+        {$i rxtensanum.inc}
+      );
+
+      regstabs_table : array[tregisterindex] of shortint = (
+        {$i rxtensasta.inc}
+      );
+
+      regdwarf_table : array[tregisterindex] of shortint = (
+        {$i rxtensadwa.inc}
+      );
+
+{*****************************************************************************
+                                Conditions
+*****************************************************************************}
+
+    type
+      TAsmCond=(C_None,
+        C_EQ,C_NE,C_CS,C_CC,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+        C_GE,C_LT,C_GT,C_LE,C_AL,C_NV
+      );
+
+      TAsmConds = set of TAsmCond;
+
+    const
+      cond2str : array[TAsmCond] of string[2]=('',
+        'eq','ne','cs','cc','mi','pl','vs','vc','hi','ls',
+        'ge','lt','gt','le','al','nv'
+      );
+
+      uppercond2str : array[TAsmCond] of string[2]=('',
+        'EQ','NE','CS','CC','MI','PL','VS','VC','HI','LS',
+        'GE','LT','GT','LE','AL','NV'
+      );
+
+{*****************************************************************************
+                                   Flags
+*****************************************************************************}
+
+    type
+      TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
+        F_GE,F_LT,F_GT,F_LE);
+
+{*****************************************************************************
+                                Operands
+*****************************************************************************}
+
+      taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
+      tshiftmode = (SM_None,SM_LSL,SM_LSR,SM_ASR,SM_ROR,SM_RRX);
+
+      tupdatereg = (UR_None,UR_Update);
+
+      pshifterop = ^tshifterop;
+
+      tshifterop = record
+        shiftmode : tshiftmode;
+        rs : tregister;
+        shiftimm : byte;
+      end;
+
+      tcpumodeflag = (mfA, mfI, mfF);
+      tcpumodeflags = set of tcpumodeflag;
+
+      tspecialregflag = (srC, srX, srS, srF);
+      tspecialregflags = set of tspecialregflag;
+
+{*****************************************************************************
+                                 Constants
+*****************************************************************************}
+
+    const
+      max_operands = 6;
+
+      maxintregs = 15;
+      maxfpuregs = 8;
+      maxaddrregs = 0;
+
+{*****************************************************************************
+                                Operand Sizes
+*****************************************************************************}
+
+    type
+      topsize = (S_NO,
+        S_B,S_W,S_L,S_BW,S_BL,S_WL,
+        S_IS,S_IL,S_IQ,
+        S_FS,S_FL,S_FX,S_D,S_Q,S_FV,S_FXX
+      );
+
+
+{*****************************************************************************
+                          Default generic sizes
+*****************************************************************************}
+    const
+      { Defines the default address size for a processor, }
+      OS_ADDR = OS_32;
+      { the natural int size for a processor,
+        has to match osuinttype/ossinttype as initialized in psystem }
+      OS_INT = OS_32;
+      OS_SINT = OS_S32;
+      { the maximum float size for a processor,           }
+      OS_FLOAT = OS_F64;
+      { the size of a vector register for a processor     }
+      OS_VECTOR = OS_M32;
+
+{*****************************************************************************
+                          Generic Register names
+*****************************************************************************}
+
+      { Stack pointer register }
+      NR_STACK_POINTER_REG = NR_A1;
+      RS_STACK_POINTER_REG = RS_A1;
+      { Frame pointer register (initialized in tcpuprocinfo.init_framepointer) }
+      RS_FRAME_POINTER_REG: tsuperregister = RS_NO;
+      NR_FRAME_POINTER_REG: tregister = NR_NO;
+      { Register for addressing absolute data in a position independant way,
+        such as in PIC code. The exact meaning is ABI specific. For
+        further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
+      }
+       { Results are returned in this register (32-bit values) }
+      NR_FUNCTION_RETURN_REG = NR_A2;
+      RS_FUNCTION_RETURN_REG = RS_A2;
+      { The value returned from a function is available in this register }
+      NR_FUNCTION_RESULT_REG = NR_FUNCTION_RETURN_REG;
+      RS_FUNCTION_RESULT_REG = RS_FUNCTION_RETURN_REG;
+
+      NR_FPU_RESULT_REG = NR_INVALID;
+
+      NR_MM_RESULT_REG  = NR_INVALID;
+
+      NR_RETURN_ADDRESS_REG = NR_FUNCTION_RETURN_REG;
+
+      { Offset where the parent framepointer is pushed }
+      PARENT_FRAMEPOINTER_OFFSET = 0;
+
+      NR_DEFAULTFLAGS = NR_INVALID;
+      RS_DEFAULTFLAGS = RS_INVALID;
+
+{*****************************************************************************
+                       GCC /ABI linking information
+*****************************************************************************}
+
+    const
+      { Required parameter alignment when calling a routine declared as
+        stdcall and cdecl. The alignment value should be the one defined
+        by GCC or the target ABI.
+
+        The value of this constant is equal to the constant
+        PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
+      }
+      std_param_align = 4;
+
+
+{*****************************************************************************
+                                  Helpers
+*****************************************************************************}
+
+    { Returns the tcgsize corresponding with the size of reg.}
+    function reg_cgsize(const reg: tregister) : tcgsize;
+    function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+    procedure inverse_flags(var f: TResFlags);
+    function flags_to_cond(const f: TResFlags) : TAsmCond;
+    function findreg_by_number(r:Tregister):tregisterindex;
+    function std_regnum_search(const s:string):Tregister;
+    function std_regname(r:Tregister):string;
+
+    function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+    function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
+    function dwarf_reg(r:tregister):shortint;
+    function dwarf_reg_no_error(r:tregister):shortint;
+    function eh_return_data_regno(nr: longint): longint;
+
+  implementation
+
+    uses
+      systems,rgBase,verbose;
+
+
+    const
+      std_regname_table : TRegNameTable = (
+        {$i rxtensastd.inc}
+      );
+
+      regnumber_index : array[tregisterindex] of tregisterindex = (
+        {$i rxtensarni.inc}
+      );
+
+      std_regname_index : array[tregisterindex] of tregisterindex = (
+        {$i rxtensasri.inc}
+      );
+
+
+    function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
+      begin
+        case regtype of
+          R_MMREGISTER:
+            begin
+              case s of
+                { records passed in MM registers }
+                OS_32,
+                OS_F32:
+                  cgsize2subreg:=R_SUBFS;
+                OS_64,
+                OS_F64:
+                  cgsize2subreg:=R_SUBFD;
+                else
+                  internalerror(2009112701);
+              end;
+            end;
+          else
+            cgsize2subreg:=R_SUBWHOLE;
+        end;
+      end;
+
+
+    function reg_cgsize(const reg: tregister): tcgsize;
+      begin
+        case getregtype(reg) of
+          R_INTREGISTER :
+            reg_cgsize:=OS_32;
+          R_FPUREGISTER :
+            reg_cgsize:=OS_F80;
+          R_MMREGISTER :
+            begin
+              case getsubreg(reg) of
+                R_SUBFD,
+                R_SUBWHOLE:
+                  result:=OS_F64;
+                R_SUBFS:
+                  result:=OS_F32;
+                else
+                  internalerror(2009112903);
+              end;
+            end;
+          else
+            internalerror(200303181);
+          end;
+        end;
+
+
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+      begin
+        { This isn't 100% perfect because the arm allows jumps also by writing to PC=R15.
+          To overcome this problem we simply forbid that FPC generates jumps by loading R15 }
+        is_calljmp:= o in [A_Bcc,A_BT,A_CALL0,A_CALL4,A_CALL8,A_CALL12,A_CALLX0,A_CALLX4,A_CALLX8,A_CALLX12];
+      end;
+
+
+    procedure inverse_flags(var f: TResFlags);
+      const
+        inv_flags: array[TResFlags] of TResFlags =
+          (F_NE,F_EQ,F_CC,F_CS,F_PL,F_MI,F_VC,F_VS,F_LS,F_HI,
+          F_LT,F_GE,F_LE,F_GT);
+      begin
+        f:=inv_flags[f];
+      end;
+
+
+    function flags_to_cond(const f: TResFlags) : TAsmCond;
+      const
+        flag_2_cond: array[F_EQ..F_LE] of TAsmCond =
+          (C_EQ,C_NE,C_CS,C_CC,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+           C_GE,C_LT,C_GT,C_LE);
+      begin
+        if f>high(flag_2_cond) then
+          internalerror(200112301);
+        result:=flag_2_cond[f];
+      end;
+
+
+    function findreg_by_number(r:Tregister):tregisterindex;
+      begin
+        result:=rgBase.findreg_by_number_table(r,regnumber_index);
+      end;
+
+
+    function std_regnum_search(const s:string):Tregister;
+      begin
+        result:=regnumber_table[findreg_by_name_table(s,std_regname_table,std_regname_index)];
+      end;
+
+
+    function std_regname(r:Tregister):string;
+      var
+        p : tregisterindex;
+      begin
+        p:=findreg_by_number_table(r,regnumber_index);
+        if p<>0 then
+          result:=std_regname_table[p]
+        else
+          result:=generic_regname(r);
+      end;
+
+
+    function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+      const
+        inverse: array[TAsmCond] of TAsmCond=(C_None,
+          C_NE,C_EQ,C_CC,C_CS,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
+          C_LT,C_GE,C_LE,C_GT,C_None,C_None
+        );
+      begin
+        result := inverse[c];
+      end;
+
+
+    function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+      begin
+        result := c1 = c2;
+      end;
+
+
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
+    function dwarf_reg(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+        if result=-1 then
+          internalerror(200603251);
+      end;
+
+
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
+
+
+    function eh_return_data_regno(nr: longint): longint;
+      begin
+        if (nr>=0) and (nr<2) then
+          result:=nr
+        else
+          result:=-1;
+      end;
+
+end.
+

+ 147 - 0
compiler/xtensa/cpuinfo.pas

@@ -0,0 +1,147 @@
+{
+    Copyright (c) 1998-2002 by the Free Pascal development team
+
+    Basic Processor information for the Xtensa
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+Unit CPUInfo;
+
+{$i fpcdefs.inc}
+
+Interface
+
+  uses
+    globtype;
+
+Type
+   bestreal = double;
+   bestrealrec = TDoubleRec;
+   ts32real = single;
+   ts64real = double;
+   ts80real = type extended;
+   ts128real = type extended;
+   ts64comp = comp;
+
+   pbestreal=^bestreal;
+
+   { possible supported processors for this target }
+   tcputype =
+      (cpu_none,
+       cpu_lx6
+      );
+
+
+Type
+   tfputype =
+     (fpu_none,
+      fpu_soft,
+      fpu_libgcc
+     );
+
+Type
+   tcontrollertype =
+     (ct_none);
+
+   tcontrollerdatatype = record
+      controllertypestr, controllerunitstr: string[20];
+      cputype: tcputype; fputype: tfputype;
+      flashbase, flashsize, srambase, sramsize, eeprombase, eepromsize, bootbase, bootsize: dword;
+   end;
+
+Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = true;
+   {# Size of native extended floating point type }
+   extended_size = 12;
+   { target cpu string (used by compiler options) }
+   target_cpu_string = 'xtensa';
+
+   { calling conventions supported by the code generator }
+   supported_calling_conventions : tproccalloptions = [
+     pocall_internproc,
+     pocall_safecall,
+     pocall_stdcall,
+     { same as stdcall only different name mangling }
+     pocall_cdecl,
+     { same as stdcall only different name mangling }
+     pocall_cppdecl,
+     { same as stdcall but floating point numbers are handled like equal sized integers }
+     pocall_softfloat,
+     { used for interrupt handling }
+     pocall_interrupt
+   ];
+
+   cputypestr : array[tcputype] of string[8] = (
+     '',
+     'LX6'
+   );
+
+   fputypestr : array[tfputype] of string[10] = (
+     'NONE',
+     'SOFT',
+     'LIBGCC'
+   );
+
+
+    { We know that there are fields after sramsize
+      but we don't care about this warning }
+    {$WARN 3177 OFF}
+
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:'';		controllerunitstr:'';	cputype:cpu_none; fputype:fpu_none; flashbase:0)
+   );
+
+   { Supported optimizations, only used for information }
+   supported_optimizerswitches = genericlevel1optimizerswitches+
+                                 genericlevel2optimizerswitches+
+                                 genericlevel3optimizerswitches-
+                                 { no need to write info about those }
+                                 [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
+                                  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath,cs_opt_forcenostackframe];
+
+   level1optimizerswitches = genericlevel1optimizerswitches;
+   level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
+   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
+
+ type
+   tcpuflags =
+      (
+        CPUXTENSA_DUMMY
+      );
+
+   tfpuflags =
+      (
+        FPUXTENSA_DUMMY
+      );
+
+ const
+   cpu_capabilities : array[tcputype] of set of tcpuflags =
+     (
+       { cpu_none     } [],
+       { cpu_lx6      } []
+     );
+
+   fpu_capabilities : array[tfputype] of set of tfpuflags =
+     (
+       { fpu_none       } [],
+       { fpu_soft       } [],
+       { fpu_libgcc     } []
+     );
+
+Implementation
+
+end.
+

+ 44 - 0
compiler/xtensa/cpunode.pas

@@ -0,0 +1,44 @@
+{******************************************************************************
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Includes the Xtensa code generator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ *****************************************************************************}
+unit cpunode;
+
+{$I fpcdefs.inc}
+
+interface
+{ This unit is used to define the specific CPU implementations. All needed
+actions are included in the INITALIZATION part of these units. This explains
+the behaviour of such a unit having just a USES clause! }
+
+implementation
+
+  uses
+    ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
+    { symtable }
+    symcpu,
+    aasmdef
+{$ifndef llvm}
+    //,ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,ncpucon
+{$else llvm}
+    llvmnode
+{$endif llvm}
+    ;
+
+end.

+ 528 - 0
compiler/xtensa/cpupara.pas

@@ -0,0 +1,528 @@
+{
+    Copyright (c) 2002 by Florian Klaempfl
+
+    Xtensa specific calling conventions
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ****************************************************************************
+}
+unit cpupara;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       globtype,
+       aasmtai,aasmdata,
+       cpubase,
+       symconst,symtype,symdef,symsym,
+       paramgr,parabase,cgbase,cgutils;
+
+    type
+       tcpuparamanager = class(tparamanager)
+          function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
+          function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
+          function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
+
+          procedure getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
+          function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+         private
+          procedure init_values(var curintreg: tsuperregister; var cur_stack_offset: aword);
+          function create_paraloc_info_intern(p : tabstractprocdef; side : tcallercallee;
+           paras : tparalist; var curintreg : tsuperregister;
+ var cur_stack_offset : aword; varargsparas : boolean) : longint;
+       end;
+
+  implementation
+
+    uses
+       cpuinfo,globals,
+       verbose,systems,
+       defutil,symtable,
+       procinfo,cpupi;
+
+
+    function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
+      begin
+        result:=[RS_A0..RS_A15];
+      end;
+
+
+    function tcpuparamanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
+      begin
+        result:=[RS_F0..RS_F15];
+      end;
+
+
+    procedure tcpuparamanager.getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+      var
+        paraloc : pcgparalocation;
+        psym : tparavarsym;
+        pdef : tdef;
+      begin
+        psym:=tparavarsym(pd.paras[nr-1]);
+        pdef:=psym.vardef;
+        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
+          pdef:=cpointerdef.getreusable_no_free(pdef);
+        cgpara.reset;
+        cgpara.size:=def_cgsize(pdef);
+        cgpara.intsize:=tcgsize2size[cgpara.size];
+        cgpara.alignment:=get_para_align(pd.proccalloption);
+        cgpara.def:=pdef;
+        paraloc:=cgpara.add_location;
+        with paraloc^ do
+         begin
+           size:=def_cgsize(pdef);
+           def:=pdef;
+           if (nr<=8) then
+             begin
+               if nr=0 then
+                 internalerror(200309271);
+               loc:=LOC_REGISTER;
+               register:=newreg(R_INTREGISTER,RS_A2+nr,R_SUBWHOLE);
+             end
+           else
+             begin
+               loc:=LOC_REFERENCE;
+               paraloc^.reference.index:=NR_STACK_POINTER_REG;
+               reference.offset:=sizeof(pint)*(nr);
+             end;
+          end;
+      end;
+
+
+
+    function getparaloc(p : tdef) : tcgloc;
+
+      begin
+         { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
+           if push_addr_param for the def is true
+         }
+         case p.typ of
+            orddef:
+              result:=LOC_REGISTER;
+            floatdef:
+              if (cs_fp_emulation in current_settings.moduleswitches) or
+                 (current_settings.fputype in [fpu_soft]) then
+                result := LOC_REGISTER
+              else
+                result := LOC_FPUREGISTER;
+            enumdef:
+              result:=LOC_REGISTER;
+            pointerdef:
+              result:=LOC_REGISTER;
+            formaldef:
+              result:=LOC_REGISTER;
+            classrefdef:
+              result:=LOC_REGISTER;
+            procvardef:
+              if (p.size = sizeof(pint)) then
+                result:=LOC_REGISTER
+              else
+                result:=LOC_REFERENCE;
+            recorddef:
+              if (p.size > 4) then
+                result:=LOC_REFERENCE
+              else
+                result:=LOC_REGISTER;
+            objectdef:
+              if is_object(p) then
+                result:=LOC_REFERENCE
+              else
+                result:=LOC_REGISTER;
+            stringdef:
+              if is_shortstring(p) or is_longstring(p) then
+                result:=LOC_REFERENCE
+              else
+                result:=LOC_REGISTER;
+            filedef:
+              result:=LOC_REGISTER;
+            arraydef:
+              if is_dynamic_array(p) then
+                getparaloc:=LOC_REGISTER
+              else
+                result:=LOC_REFERENCE;
+            setdef:
+              if is_smallset(p) then
+                result:=LOC_REGISTER
+              else
+                result:=LOC_REFERENCE;
+            variantdef:
+              result:=LOC_REFERENCE;
+            { avoid problems with errornous definitions }
+            errordef:
+              result:=LOC_REGISTER;
+            else
+              internalerror(2002071001);
+         end;
+      end;
+
+
+    function tcpuparamanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
+      begin
+        result:=false;
+        { var,out,constref always require address }
+        if varspez in [vs_var,vs_out,vs_constref] then
+          begin
+            result:=true;
+            exit;
+          end;
+        case def.typ of
+          variantdef,
+          formaldef :
+            result:=true;
+          { regular procvars must be passed by value, because you cannot pass
+            the address of a local stack location when calling e.g.
+            pthread_create with the address of a function (first of all it
+            expects the address of the function to execute and not the address
+            of a memory location containing that address, and secondly if you
+            first store the address on the stack and then pass the address of
+            this stack location, then this stack location may no longer be
+            valid when the newly started thread accesses it.
+
+            However, for "procedure of object" we must use the same calling
+            convention as for "8 byte record" due to the need for
+            interchangeability with the TMethod record type.
+          }
+          procvardef :
+            result:=
+              (def.size <> sizeof(pint));
+          recorddef :
+            result := (def.size > 8) or (varspez = vs_const);
+          arraydef:
+            result:=(tarraydef(def).highrange>=tarraydef(def).lowrange) or
+                             is_open_array(def) or
+                             is_array_of_const(def) or
+                             is_array_constructor(def);
+          objectdef :
+            result:=is_object(def);
+          setdef :
+            result:=not is_smallset(def);
+          stringdef :
+            result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+          else
+            ;
+        end;
+      end;
+
+
+    procedure tcpuparamanager.init_values(var curintreg: tsuperregister; var cur_stack_offset: aword);
+      begin
+        cur_stack_offset:=0;
+        curintreg:=RS_A2;
+      end;
+
+
+    function tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
+      var
+        paraloc : pcgparalocation;
+        retcgsize  : tcgsize;
+      begin
+        if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
+          exit;
+
+        paraloc:=result.add_location;
+        { Return in FPU register? }
+        if (result.def.typ=floatdef) and
+           (not ((cs_fp_emulation in current_settings.moduleswitches) or
+                 (current_settings.fputype in [fpu_soft]))) then
+          begin
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
+            paraloc^.size:=retcgsize;
+            paraloc^.def:=result.def;
+          end
+        else
+         { Return in register }
+          begin
+            if retcgsize in [OS_64,OS_S64] then
+             begin
+               { low 32bits }
+               paraloc^.loc:=LOC_REGISTER;
+               if side=callerside then
+                 paraloc^.register:=NR_A3
+               else
+                 paraloc^.register:=NR_A3;
+               paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
+               { high 32bits }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
+               if side=callerside then
+                 paraloc^.register:=NR_A2
+               else
+                 paraloc^.register:=NR_A2;
+               paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
+             end
+            else
+             begin
+               paraloc^.loc:=LOC_REGISTER;
+               if side=callerside then
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+               else
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+               paraloc^.size:=retcgsize;
+               paraloc^.def:=result.def;
+             end;
+          end;
+      end;
+
+
+    function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
+
+      var
+        cur_stack_offset: aword;
+        curintreg: tsuperregister;
+      begin
+        init_values(curintreg,cur_stack_offset);
+
+        result := create_paraloc_info_intern(p,side,p.paras,curintreg,cur_stack_offset,false);
+
+        create_funcretloc_info(p,side);
+      end;
+
+
+
+    function tcpuparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras:tparalist;
+      var curintreg: tsuperregister; var cur_stack_offset: aword; varargsparas: boolean):longint;
+      var
+         stack_offset: longint;
+         paralen: aint;
+         nextintreg : tsuperregister;
+         locdef,
+         fdef,
+         paradef : tdef;
+         paraloc : pcgparalocation;
+         i  : integer;
+         hp : tparavarsym;
+         loc : tcgloc;
+         paracgsize: tcgsize;
+         firstparaloc: boolean;
+
+      begin
+{$ifdef extdebug}
+         if po_explicitparaloc in p.procoptions then
+           internalerror(200411141);
+{$endif extdebug}
+
+         result:=0;
+         nextintreg := curintreg;
+         stack_offset := cur_stack_offset;
+
+          for i:=0 to paras.count-1 do
+            begin
+              hp:=tparavarsym(paras[i]);
+              paradef := hp.vardef;
+
+              hp.paraloc[side].reset;
+              { currently only support C-style array of const }
+              if (p.proccalloption in cstylearrayofconst) and
+                 is_array_of_const(paradef) then
+                begin
+                  paraloc:=hp.paraloc[side].add_location;
+                  { hack: the paraloc must be valid, but is not actually used }
+                  paraloc^.loc := LOC_REGISTER;
+                  paraloc^.register := NR_A2;
+                  paraloc^.size := OS_ADDR;
+                  paraloc^.def:=voidpointertype;
+                  break;
+                end;
+
+              if push_addr_param(hp.varspez,paradef,p.proccalloption) then
+                begin
+                  paradef:=cpointerdef.getreusable_no_free(paradef);
+                  loc:=LOC_REGISTER;
+                  paracgsize := OS_ADDR;
+                  paralen := tcgsize2size[OS_ADDR];
+                end
+              else
+                begin
+                  if not is_special_array(paradef) then
+                    paralen := paradef.size
+                  else
+                    paralen := tcgsize2size[def_cgsize(paradef)];
+                  paracgsize:=def_cgsize(paradef);
+                  { for things like formaldef }
+                  if (paracgsize=OS_NO) then
+                    begin
+                      paracgsize:=OS_ADDR;
+                      paralen := tcgsize2size[OS_ADDR];
+                    end;
+                end;
+
+              loc := getparaloc(paradef);
+
+              hp.paraloc[side].alignment:=std_param_align;
+              hp.paraloc[side].size:=paracgsize;
+              hp.paraloc[side].intsize:=paralen;
+              hp.paraloc[side].def:=paradef;
+              if (is_64bit(paradef)) and
+                 odd(nextintreg-RS_A2) then
+                inc(nextintreg);
+              if (paralen = 0) then
+                if (paradef.typ = recorddef) then
+                  begin
+                    paraloc:=hp.paraloc[side].add_location;
+                    paraloc^.loc := LOC_VOID;
+                  end
+                else
+                  internalerror(2005011310);
+              locdef:=paradef;
+              firstparaloc:=true;
+              { can become < 0 for e.g. 3-byte records }
+              while (paralen > 0) do
+                begin
+                  paraloc:=hp.paraloc[side].add_location;
+                  { In case of po_delphi_nested_cc, the parent frame pointer
+                    is always passed on the stack. }
+                  if (loc = LOC_REGISTER) and
+                     (nextintreg <= RS_A7) and
+                     (not(vo_is_parentfp in hp.varoptions) or
+                      not(po_delphi_nested_cc in p.procoptions)) then
+                    begin
+                      paraloc^.loc := loc;
+                      { make sure we don't lose whether or not the type is signed }
+                      if (paradef.typ<>orddef) then
+                        begin
+                          paracgsize:=int_cgsize(paralen);
+                          locdef:=get_paraloc_def(paradef,paralen,firstparaloc);
+                        end;
+                      if (paracgsize in [OS_NO,OS_64,OS_S64,OS_128,OS_S128]) then
+                        begin
+                          paraloc^.size:=OS_INT;
+                          paraloc^.def:=u32inttype;
+                        end
+                      else
+                        begin
+                          paraloc^.size:=paracgsize;
+                          paraloc^.def:=locdef;
+                        end;
+                      { aix requires that record data stored in parameter
+                        registers is left-aligned }
+                      if (target_info.system in systems_aix) and
+                         (paradef.typ = recorddef) and
+                         (paralen < sizeof(aint)) then
+                        begin
+                          paraloc^.shiftval := (sizeof(aint)-paralen)*(-8);
+                          paraloc^.size := OS_INT;
+                          paraloc^.def := u32inttype;
+                        end;
+                      paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBNONE);
+                      inc(nextintreg);
+                      dec(paralen,tcgsize2size[paraloc^.size]);
+                    end
+                  else if (loc = LOC_FPUREGISTER) and
+                          (nextintreg <= RS_A7) then
+                    begin
+                      paraloc^.loc:=loc;
+                      paraloc^.size := paracgsize;
+                      paraloc^.def := paradef;
+                      paraloc^.register:=newreg(R_FPUREGISTER,nextintreg,R_SUBWHOLE);
+                      inc(nextintreg);
+                      dec(paralen,tcgsize2size[paraloc^.size]);
+                    end
+                  else { LOC_REFERENCE }
+                    begin
+                       paraloc^.loc:=LOC_REFERENCE;
+                       case loc of
+                         LOC_FPUREGISTER:
+                           begin
+                             paraloc^.size:=int_float_cgsize(paralen);
+                             case paraloc^.size of
+                               OS_F32: paraloc^.def:=s32floattype;
+                               OS_F64: paraloc^.def:=s64floattype;
+                               else
+                                 internalerror(2013060124);
+                             end;
+                           end;
+                         LOC_REGISTER,
+                         LOC_REFERENCE:
+                           begin
+                             paraloc^.size:=int_cgsize(paralen);
+                             if paraloc^.size<>OS_NO then
+                               paraloc^.def:=cgsize_orddef(paraloc^.size)
+                             else
+                               paraloc^.def:=carraydef.getreusable_no_free(u8inttype,paralen);
+                           end;
+                         else
+                           internalerror(2006011101);
+                       end;
+                       if (side = callerside) then
+                         paraloc^.reference.index:=NR_STACK_POINTER_REG
+                       else
+                         begin
+                           paraloc^.reference.index:=NR_FRAME_POINTER_REG;
+
+                           { create_paraloc_info_intern might be also called when being outside of
+                             code generation so current_procinfo might be not set }
+                           if assigned(current_procinfo) then
+                             txtensaprocinfo(current_procinfo).needs_frame_pointer := true;
+                         end;
+
+                       paraloc^.reference.offset:=stack_offset;
+
+                       inc(stack_offset,align(paralen,4));
+                       while (paralen > 0) and
+                             (nextintreg < RS_A7) do
+                          begin
+                            inc(nextintreg);
+                            dec(paralen,sizeof(pint));
+                          end;
+                       paralen := 0;
+                    end;
+                  firstparaloc:=false;
+                end;
+            end;
+         curintreg:=nextintreg;
+         cur_stack_offset:=stack_offset;
+         result:=stack_offset;
+      end;
+
+
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
+      var
+        cur_stack_offset: aword;
+        parasize, l: longint;
+        curintreg, firstfloatreg: tsuperregister;
+        i : integer;
+        hp: tparavarsym;
+        paraloc: pcgparalocation;
+      begin
+        init_values(curintreg,cur_stack_offset);
+
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,cur_stack_offset, false);
+        if (p.proccalloption in cstylearrayofconst) then
+          { just continue loading the parameters in the registers }
+          begin
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  result:=create_paraloc_info_intern(p,side,varargspara,curintreg,cur_stack_offset,true)
+                else
+                  internalerror(2020030704);
+              end;
+           end
+        else
+          internalerror(2020030703);
+        create_funcretloc_info(p,side);
+      end;
+
+begin
+   paramanager:=tcpuparamanager.create;
+end.

+ 124 - 0
compiler/xtensa/cpupi.pas

@@ -0,0 +1,124 @@
+{
+    Copyright (c) 2002 by Florian Klaempfl
+
+    This unit contains the CPU specific part of tprocinfo
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+{ This unit contains the CPU specific part of tprocinfo. }
+unit cpupi;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       cutils,globtype,
+       cgbase,aasmdata,
+       procinfo,cpuinfo,psub;
+
+    type
+      txtensaprocinfo = class(tcgprocinfo)
+          { for arm thumb, we need to know the stackframe size before
+            starting procedure compilation, so this contains the stack frame size, the compiler
+            should assume
+            if this size is too little the procedure must be compiled again with a larger value }
+          stackframesize,
+          floatregstart : aint;
+          stackpaddingreg: TSuperRegister;
+
+          needs_frame_pointer: boolean;
+          // procedure handle_body_start;override;
+          // procedure after_pass1;override;            
+          constructor create(aparent: tprocinfo); override;
+          procedure set_first_temp_offset;override;
+          function calc_stackframe_size:longint;override;
+      end;
+
+
+  implementation
+
+    uses
+       globals,systems,
+       cpubase,
+       tgobj,
+       symconst,symtype,symsym,symcpu,paramgr,
+       cgutils,
+       cgobj,
+       defutil,
+       aasmcpu;     
+
+    constructor txtensaprocinfo.create(aparent: tprocinfo);
+      begin
+        inherited create(aparent);
+        maxpushedparasize := 0;
+      end;
+
+
+    procedure txtensaprocinfo.set_first_temp_offset;
+      begin
+        if (po_nostackframe in procdef.procoptions) then
+          begin
+             { maxpushedparasize sghould be zero,
+               if not we will get an error later. }
+             tg.setfirsttemp(maxpushedparasize);
+             exit;
+          end;
+
+        if tg.direction = -1 then
+          tg.setfirsttemp(-(1+12)*4)
+        else
+          tg.setfirsttemp(maxpushedparasize);
+      end;
+
+
+    function txtensaprocinfo.calc_stackframe_size:longint;
+      var
+         firstfloatreg,lastfloatreg,
+         r : byte;
+         floatsavesize : aword;
+         regs: tcpuregisterset;
+      begin
+        maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
+        floatsavesize:=0;
+        //case current_settings.fputype of
+        //  fpu_fd:
+        //    begin
+        //      floatsavesize:=0;
+        //      regs:=cg.rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
+        //      for r:=RS_F0 to RS_F31 do
+        //        if r in regs then
+        //          inc(floatsavesize,8);
+        //    end;
+        //  else
+        //    ;
+        //end;
+        floatsavesize:=align(floatsavesize,max(current_settings.alignment.localalignmin,4));
+        result:=Align(tg.direction*tg.lasttemp,max(current_settings.alignment.localalignmin,4))+maxpushedparasize+aint(floatsavesize);
+
+        if tg.direction=1 then
+          floatregstart:=result-aint(floatsavesize)
+        else
+          floatregstart:=-result+maxpushedparasize;
+      end;
+
+
+begin
+   cprocinfo:=txtensaprocinfo;
+end.
+

+ 67 - 0
compiler/xtensa/cputarg.pas

@@ -0,0 +1,67 @@
+{
+    Copyright (c) 2001-2002 by Peter Vreman
+
+    Includes the AArch64 dependent target units
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cputarg;
+
+{$i fpcdefs.inc}
+
+interface
+
+
+implementation
+
+    uses
+      systems { prevent a syntax error when nothing is included }
+
+{**************************************
+             Targets
+**************************************}
+
+    {$ifndef NOTARGETEMBEDDED}
+      ,t_embed
+    {$endif}
+
+{**************************************
+             Assemblers
+**************************************}
+
+    {$ifndef NOAGCPUGAS}
+      ,agcpugas
+    {$endif}
+
+{**************************************
+        Assembler Readers
+**************************************}
+
+  {$ifndef NoRaarmgas}
+       ,racpugas
+  {$endif NoRaarmgas}
+
+{**************************************
+             Debuginfo
+**************************************}
+
+  {$ifndef NoDbgDwarf}
+      ,dbgdwarf
+  {$endif NoDbgDwarf}
+      ;
+
+end.

+ 67 - 0
compiler/xtensa/hlcgcpu.pas

@@ -0,0 +1,67 @@
+{
+    Copyright (c) 1998-2010 by Florian Klaempfl and Jonas Maebe
+    Member of the Free Pascal development team
+
+    This unit contains routines to create a pass-through high-level code
+    generator. This is used by most regular code generators.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+unit hlcgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    symtype,
+    aasmdata,
+    symdef,
+    cgbase,cgutils,
+    hlcgobj, hlcg2ll;
+
+  type
+    thlcgxtensa = class(thlcg2ll)
+      procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
+    end;
+
+implementation
+
+  uses
+    verbose,globtype,fmodule,
+    aasmbase,aasmtai,
+    symconst,symsym,defutil,
+    cpubase,aasmcpu,parabase,
+    cgobj,cgcpu;
+
+  procedure create_hlcodegen_cpu;
+    begin
+      hlcg:=thlcgxtensa.create;
+      create_codegen;
+    end;
+
+
+  procedure thlcgxtensa.g_intf_wrapper(list : TAsmList; procdef : tprocdef;
+   const labelname : string; ioffset : longint);
+    begin
+    end;
+
+begin
+  chlcgobj:=thlcgxtensa;
+  create_hlcodegen:=@create_hlcodegen_cpu;
+end.

+ 93 - 0
compiler/xtensa/itcpugas.pas

@@ -0,0 +1,93 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    This unit contains the Xtensa GAS instruction tables
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit itcpugas;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    cpubase,cgbase;
+
+
+  const
+    { Standard opcode string table (for each tasmop enumeration). The
+      opcode strings should conform to the names as defined by the
+      processor manufacturer.
+    }
+    gas_op2str : op2strtable = {$i xtensaatt.inc}
+
+    function gas_regnum_search(const s:string):Tregister;
+    function gas_regname(r:Tregister):string;
+
+
+implementation
+
+    uses
+      rgbase;
+
+    const
+      gas_regname_table : TRegNameTable = (
+        {$i rxtensastd.inc}
+      );
+
+      gas_regname_index : array[tregisterindex] of tregisterindex = (
+        {$i rxtensasri.inc}
+      );
+
+    function findreg_by_gasname(const s:string):tregisterindex;
+      var
+        i,p : tregisterindex;
+      begin
+        {Binary search.}
+        p:=0;
+        i:=regnumber_count_bsstart;
+        repeat
+          if (p+i<=high(tregisterindex)) and (gas_regname_table[gas_regname_index[p+i]]<=s) then
+            p:=p+i;
+          i:=i shr 1;
+        until i=0;
+        if gas_regname_table[gas_regname_index[p]]=s then
+          findreg_by_gasname:=gas_regname_index[p]
+        else
+          findreg_by_gasname:=0;
+      end;
+
+
+    function gas_regnum_search(const s:string):Tregister;
+      begin
+        result:=regnumber_table[findreg_by_gasname(s)];
+      end;
+
+
+    function gas_regname(r:Tregister):string;
+      var
+        p : tregisterindex;
+      begin
+        p:=findreg_by_number(r);
+        if p<>0 then
+          result:=gas_regname_table[p]
+        else
+          result:=generic_regname(r);
+      end;
+
+end.

+ 1035 - 0
compiler/xtensa/racpugas.pas

@@ -0,0 +1,1035 @@
+{
+    Copyright (c) 1998-2002 by Carl Eric Codere and Peter Vreman
+
+    Does the parsing for the Xtensa GNU AS styled inline assembler.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit racpugas;
+
+{$i fpcdefs.inc}
+
+  Interface
+
+    uses
+      raatt,raxtensa,
+      cpubase;
+
+    type
+
+      txtensaattreader = class(tattreader)
+       function is_asmopcode(const s: string):boolean;override;
+        function is_register(const s:string):boolean;override;
+//        function is_targetdirective(const s: string): boolean; override;
+        procedure handleopcode;override;
+        procedure BuildReference(oper : TXtensaOperand);
+        procedure BuildOperand(oper : TXtensaOperand);
+        procedure BuildSpecialreg(oper : TXtensaOperand);
+        procedure BuildOpCode(instr : TXtensaInstruction);
+        procedure ReadSym(oper : TXtensaOperand);
+        procedure ConvertCalljmp(instr : TXtensaInstruction);
+        procedure HandleTargetDirective; override;
+      end;
+
+  Implementation
+
+    uses
+      { helpers }
+      cutils,
+      { global }
+      globtype,globals,verbose,
+      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      { symtable }
+      symconst,symsym,symdef,
+      procinfo,
+      rabase,rautils,
+      cgbase,cgutils,paramgr;
+
+
+    function txtensaattreader.is_register(const s:string):boolean;
+      type
+        treg2str = record
+          name : string[3];
+          reg : tregister;
+        end;
+
+      const
+        extraregs : array[0..0] of treg2str = (
+          (name: 'A1'; reg : NR_A0)
+          );
+
+      var
+        i : longint;
+
+      begin
+        result:=inherited is_register(s);
+        { reg found?
+          possible aliases are always 2 char
+        }
+        if result or (not (length(s) in [2,3])) then
+          exit;
+        for i:=low(extraregs) to high(extraregs) do
+          begin
+            if s=extraregs[i].name then
+              begin
+                actasmregister:=extraregs[i].reg;
+                result:=true;
+                actasmtoken:=AS_REGISTER;
+                exit;
+              end;
+          end;
+      end;
+
+
+    procedure txtensaattreader.ReadSym(oper : TXtensaOperand);
+      var
+         tempstr, mangledname : string;
+         typesize,l,k : tcgint;
+      begin
+        tempstr:=actasmpattern;
+        Consume(AS_ID);
+        { typecasting? }
+        if (actasmtoken=AS_LPAREN) and
+           SearchType(tempstr,typesize) then
+          begin
+            oper.hastype:=true;
+            Consume(AS_LPAREN);
+            BuildOperand(oper);
+            Consume(AS_RPAREN);
+            if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+              oper.SetSize(typesize,true);
+          end
+        else
+          if not oper.SetupVar(tempstr,false) then
+            Message1(sym_e_unknown_id,tempstr);
+        { record.field ? }
+        if actasmtoken=AS_DOT then
+          begin
+            BuildRecordOffsetSize(tempstr,l,k,mangledname,false);
+            if (mangledname<>'') then
+              Message(asmr_e_invalid_reference_syntax);
+            inc(oper.opr.ref.offset,l);
+          end;
+      end;
+
+
+    Procedure txtensaattreader.BuildReference(oper : TXtensaOperand);
+
+      procedure do_error;
+        begin
+          Message(asmr_e_invalid_reference_syntax);
+          RecoverConsume(false);
+        end;
+
+
+      procedure test_end(require_rbracket : boolean);
+        begin
+          if require_rbracket then begin
+            if not(actasmtoken=AS_RBRACKET) then
+              begin
+                do_error;
+                exit;
+              end
+            else
+              Consume(AS_RBRACKET);
+          end;
+          if not(actasmtoken in [AS_SEPARATOR,AS_end]) then
+            do_error
+          else
+            begin
+{$IFDEF debugasmreader}
+              writeln('TEST_end_FINAL_OK. Created the following ref:');
+              writeln('oper.opr.ref.shiftimm=',oper.opr.ref.shiftimm);
+              writeln('oper.opr.ref.shiftmode=',ord(oper.opr.ref.shiftmode));
+              writeln('oper.opr.ref.index=',ord(oper.opr.ref.index));
+              writeln('oper.opr.ref.base=',ord(oper.opr.ref.base));
+              writeln('oper.opr.ref.signindex=',ord(oper.opr.ref.signindex));
+              writeln('oper.opr.ref.addressmode=',ord(oper.opr.ref.addressmode));
+              writeln;
+{$endIF debugasmreader}
+            end;
+        end;
+
+
+      function is_shifter_ref_operation(var a : tshiftmode) : boolean;
+        begin
+          a := SM_NONE;
+          if      (actasmpattern='LSL') then
+            a := SM_LSL
+          else if (actasmpattern='LSR') then
+            a := SM_LSR
+          else if (actasmpattern='ASR') then
+            a := SM_ASR
+          else if (actasmpattern='ROR') then
+            a := SM_ROR
+          else if (actasmpattern='RRX') then
+            a := SM_RRX;
+          is_shifter_ref_operation := not(a=SM_NONE);
+        end;
+
+
+      procedure read_index_shift(require_rbracket : boolean);
+        var
+          shift : aint;
+        begin
+          case actasmtoken of
+            AS_COMMA :
+              begin
+                Consume(AS_COMMA);
+                if not(actasmtoken=AS_ID) then
+                  do_error;
+              end;
+            AS_RBRACKET :
+              if require_rbracket then
+                test_end(require_rbracket)
+              else
+                begin
+                  do_error;
+                  exit;
+                end;
+            AS_SEPARATOR,AS_END :
+              if not require_rbracket then
+                test_end(false)
+               else
+                 do_error;
+            else
+              begin
+                do_error;
+                exit;
+              end;
+          end;
+        end;
+
+
+      procedure read_index(require_rbracket : boolean);
+        var
+          recname : string;
+          o_int,s_int : tcgint;
+        begin
+          case actasmtoken of
+            AS_REGISTER :
+              begin
+                oper.opr.ref.index:=actasmregister;
+                Consume(AS_REGISTER);
+                read_index_shift(require_rbracket);
+                exit;
+              end;
+            AS_PLUS,AS_MINUS :
+              begin
+                if actasmtoken=AS_PLUS then
+                  begin
+                    Consume(AS_PLUS);
+                  end;
+                if actasmtoken=AS_REGISTER then
+                  begin
+                    oper.opr.ref.index:=actasmregister;
+                    Consume(AS_REGISTER);
+                    read_index_shift(require_rbracket);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error;
+                    exit;
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_HASH : // constant
+              begin
+                Consume(AS_HASH);
+                o_int := BuildConstExpression(false,true);
+                if (o_int>4095) or (o_int<-4095) then
+                  begin
+                    Message(asmr_e_constant_out_of_bounds);
+                    RecoverConsume(false);
+                    exit;
+                  end
+                else
+                  begin
+                    inc(oper.opr.ref.offset,o_int);
+                    test_end(require_rbracket);
+                    exit;
+                  end;
+              end;
+            AS_ID :
+              begin
+                recname := actasmpattern;
+                Consume(AS_ID);
+                BuildRecordOffsetSize(recname,o_int,s_int,recname,false);
+                if (o_int>4095)or(o_int<-4095) then
+                  begin
+                    Message(asmr_e_constant_out_of_bounds);
+                    RecoverConsume(false);
+                    exit;
+                  end
+                else
+                  begin
+                    inc(oper.opr.ref.offset,o_int);
+                    test_end(require_rbracket);
+                    exit;
+                  end;
+              end;
+            AS_AT:
+              begin
+                do_error;
+                exit;
+              end;
+            AS_DOT : // local label
+              begin
+                BuildConstExpression(true,false);
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_RBRACKET :
+              begin
+                if require_rbracket then
+                  begin
+                    test_end(require_rbracket);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error; // unexpected rbracket
+                    exit;
+                  end;
+              end;
+            AS_SEPARATOR,AS_end :
+              begin
+                if not require_rbracket then
+                  begin
+                    test_end(false);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error;
+                    exit;
+                  end;
+              end;
+            else
+              begin
+                // unexpected token
+                do_error;
+                exit;
+              end;
+          end; // case
+        end;
+
+
+      procedure try_prepostindexed;
+        begin
+          Consume(AS_RBRACKET);
+          case actasmtoken of
+            AS_COMMA :
+              begin // post-indexed
+                Consume(AS_COMMA);
+                read_index(false);
+                exit;
+              end;
+            AS_NOT :
+              begin   // pre-indexed
+                Consume(AS_NOT);
+                test_end(false);
+                exit;
+              end;
+            else
+              begin
+                test_end(false);
+                exit;
+              end;
+          end; // case
+        end;
+
+      var
+        lab : TASMLABEL;
+      begin
+        Consume(AS_LBRACKET);
+        if actasmtoken=AS_REGISTER then
+          begin
+            oper.opr.ref.base:=actasmregister;
+            Consume(AS_REGISTER);
+            case actasmtoken of
+              AS_RBRACKET :
+                begin
+                  try_prepostindexed;
+                  exit;
+                end;
+              AS_COMMA :
+                begin
+                  Consume(AS_COMMA);
+                  read_index(true);
+                  exit;
+                end;
+              else
+                begin
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                end;
+            end;
+          end
+        else
+{
+  if base isn't a register, r15=PC is implied base, so it must be a local label.
+  pascal constants don't make sense, because implied r15
+  record offsets probably don't make sense, too (a record offset of code?)
+
+  TODO: However, we could make the Stackpointer implied.
+
+}
+
+          Begin
+            case actasmtoken of
+              AS_ID :
+                begin
+                  // TODO: Stackpointer implied,
+                   Message(asmr_e_invalid_reference_syntax);
+                   RecoverConsume(false);
+                   exit;
+                end;
+              else
+                begin
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+            end;
+          end;
+      end;
+
+
+    Procedure txtensaattreader.BuildOperand(oper : TXtensaOperand);
+      var
+        expr : string;
+        typesize,l : tcgint;
+
+
+        procedure AddLabelOperand(hl:tasmlabel);
+          begin
+            oper.opr.typ:=OPR_SYMBOL;
+            oper.opr.symbol:=hl;
+          end;
+
+
+        procedure MaybeRecordOffset;
+          var
+            mangledname: string;
+            hasdot  : boolean;
+            l,
+            toffset,
+            tsize   : tcgint;
+          begin
+            if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
+             exit;
+            l:=0;
+            mangledname:='';
+            hasdot:=(actasmtoken=AS_DOT);
+            if hasdot then
+              begin
+                if expr<>'' then
+                  begin
+                    BuildRecordOffsetSize(expr,toffset,tsize,mangledname,false);
+                    if (oper.opr.typ<>OPR_CONSTANT) and
+                       (mangledname<>'') then
+                      Message(asmr_e_wrong_sym_type);
+                    inc(l,toffset);
+                    oper.SetSize(tsize,true);
+                  end;
+              end;
+            if actasmtoken in [AS_PLUS,AS_MINUS] then
+              inc(l,BuildConstExpression(true,false));
+            case oper.opr.typ of
+              OPR_LOCAL :
+                begin
+                  { don't allow direct access to fields of parameters, because that
+                    will generate buggy code. Allow it only for explicit typecasting }
+                  if hasdot and
+                     (not oper.hastype) then
+                     checklocalsubscript(oper.opr.localsym);
+                  inc(oper.opr.localsymofs,l)
+                end;
+              OPR_CONSTANT :
+                inc(oper.opr.val,l);
+              OPR_REFERENCE :
+                if (mangledname<>'') then
+                  begin
+                    if (oper.opr.val<>0) then
+                      Message(asmr_e_wrong_sym_type);
+                    oper.opr.typ:=OPR_SYMBOL;
+                    oper.opr.symbol:=current_asmdata.RefAsmSymbol(mangledname,AT_FUNCTION);
+                  end
+                else
+                  inc(oper.opr.val,l);
+              OPR_SYMBOL:
+                Message(asmr_e_invalid_symbol_ref);
+              else
+                internalerror(200309221);
+            end;
+          end;
+
+
+        function MaybeBuildReference:boolean;
+          { Try to create a reference, if not a reference is found then false
+            is returned }
+          begin
+            MaybeBuildReference:=true;
+            case actasmtoken of
+              AS_INTNUM,
+              AS_MINUS,
+              AS_PLUS:
+                Begin
+                  oper.opr.ref.offset:=BuildConstExpression(True,False);
+                  if actasmtoken<>AS_LPAREN then
+                    Message(asmr_e_invalid_reference_syntax)
+                  else
+                    BuildReference(oper);
+                end;
+              AS_LPAREN:
+                BuildReference(oper);
+              AS_ID: { only a variable is allowed ... }
+                Begin
+                  ReadSym(oper);
+                  case actasmtoken of
+                    AS_end,
+                    AS_SEPARATOR,
+                    AS_COMMA: ;
+                    AS_LPAREN:
+                      BuildReference(oper);
+                  else
+                    Begin
+                      Message(asmr_e_invalid_reference_syntax);
+                      Consume(actasmtoken);
+                    end;
+                  end; {end case }
+                end;
+              else
+               MaybeBuildReference:=false;
+            end; { end case }
+          end;
+
+
+        procedure BuildDirectRef;
+
+          function GetConstLabel(const symname: string; ofs: aint): TAsmLabel;
+            var
+              hp: tai;
+              newconst: tai_const;
+              lab: TAsmLabel;
+            begin
+              if symname<>'' then
+                newconst:=tai_const.Createname(symname,ofs)
+              else
+                newconst:=tai_const.Create_32bit(ofs);
+
+              hp:=tai(current_procinfo.aktlocaldata.First);
+              while assigned(hp) do
+                begin
+                  if hp.typ=ait_const then
+                    begin
+                      if (tai_const(hp).sym=newconst.sym) and
+                         (tai_const(hp).value=newconst.value) and
+                         assigned(hp.Previous) and
+                         (tai(hp.previous).typ=ait_label) then
+                        begin
+                          newconst.Free;
+                          result:=tai_label(hp.Previous).labsym;
+                          exit;
+                        end;
+                    end;
+
+                  hp:=tai(hp.Next);
+                end;
+
+              current_asmdata.getjumplabel(lab);
+              current_procinfo.aktlocaldata.concat(tai_align.create(4));
+              current_procinfo.aktlocaldata.concat(tai_label.create(lab));
+              current_procinfo.aktlocaldata.concat(newconst);
+              result:=lab;
+            end;
+
+          var
+            symtype: TAsmsymtype;
+            sym: string;
+            val: tcgint;
+          begin
+            case actasmtoken of
+              AS_INTNUM,
+              AS_ID:
+                begin
+                  BuildConstSymbolExpression(true,false,false,val,sym,symtype);
+
+                  if symtype=AT_NONE then
+                    sym:='';
+
+                  reference_reset(oper.opr.ref,4,[]);
+                  oper.opr.ref.symbol:=GetConstLabel(sym,val);
+                end;
+              else
+                ;
+            end;
+          end;
+
+
+      function getregsetindex(reg: tregister): integer;
+        begin
+          if getsubreg(reg)=R_SUBFS then
+            begin
+              result:=getsupreg(reg)*2;
+              if result>32 then
+                result:=result-63;
+            end
+          else
+            result:=getsupreg(reg);
+        end;
+
+      var
+        tempreg : tregister;
+        ireg : tsuperregister;
+        regtype: tregistertype;
+        subreg: tsubregister;
+        hl : tasmlabel;
+        {ofs : longint;}
+        registerset : tcpuregisterset;
+      Begin
+        expr:='';
+        case actasmtoken of
+          AS_LBRACKET: { Memory reference or constant expression }
+            Begin
+              oper.InitRef;
+              BuildReference(oper);
+            end;
+
+          AS_HASH: { Constant expression  }
+            Begin
+              Consume(AS_HASH);
+              BuildConstantOperand(oper);
+            end;
+          AS_ID: { A constant expression, or a Variable ref.  }
+            Begin
+              if is_locallabel(actasmpattern) then
+               begin
+                 CreateLocalLabel(actasmpattern,hl,false);
+                 Consume(AS_ID);
+                 AddLabelOperand(hl);
+               end
+              else
+               { Check for label }
+               if SearchLabel(actasmpattern,hl,false) then
+                begin
+                  Consume(AS_ID);
+                  AddLabelOperand(hl);
+                end
+              else
+               { probably a variable or normal expression }
+               { or a procedure (such as in CALL ID)      }
+               Begin
+                 { is it a constant ? }
+                 if SearchIConstant(actasmpattern,l) then
+                  Begin
+                    if not (oper.opr.typ in [OPR_NONE,OPR_CONSTANT]) then
+                     Message(asmr_e_invalid_operand_type);
+                    BuildConstantOperand(oper);
+                  end
+                 else
+                  begin
+                    expr:=actasmpattern;
+                    Consume(AS_ID);
+                    { typecasting? }
+                    if (actasmtoken=AS_LPAREN) and
+                       SearchType(expr,typesize) then
+                     begin
+                       oper.hastype:=true;
+                       Consume(AS_LPAREN);
+                       BuildOperand(oper);
+                       Consume(AS_RPAREN);
+                       if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+                         oper.SetSize(typesize,true);
+                     end
+                    else
+                     begin
+                       if not(oper.SetupVar(expr,false)) then
+                        Begin
+                          { look for special symbols ... }
+                          if expr= '__HIGH' then
+                            begin
+                              consume(AS_LPAREN);
+                              if not oper.setupvar('high'+actasmpattern,false) then
+                                Message1(sym_e_unknown_id,'high'+actasmpattern);
+                              consume(AS_ID);
+                              consume(AS_RPAREN);
+                            end
+                          else
+                           if expr = '__RESULT' then
+                            oper.SetUpResult
+                          else
+                           if expr = '__SELF' then
+                            oper.SetupSelf
+                          else
+                           if expr = '__OLDEBP' then
+                            oper.SetupOldEBP
+                          else
+                            Message1(sym_e_unknown_id,expr);
+                        end;
+                     end;
+                  end;
+                  if actasmtoken=AS_DOT then
+                    MaybeRecordOffset;
+                  { add a constant expression? }
+                  if (actasmtoken=AS_PLUS) then
+                   begin
+                     l:=BuildConstExpression(true,false);
+                     case oper.opr.typ of
+                       OPR_CONSTANT :
+                         inc(oper.opr.val,l);
+                       OPR_LOCAL :
+                         inc(oper.opr.localsymofs,l);
+                       OPR_REFERENCE :
+                         inc(oper.opr.ref.offset,l);
+                       else
+                         internalerror(200309202);
+                     end;
+                   end
+               end;
+              { Do we have a indexing reference, then parse it also }
+              if actasmtoken=AS_LPAREN then
+                BuildReference(oper);
+            end;
+
+          { Register, a variable reference or a constant reference  }
+          AS_REGISTER:
+            Begin
+              { save the type of register used. }
+              tempreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (actasmtoken in [AS_end,AS_SEPARATOR,AS_COMMA]) then
+                Begin
+                  if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
+                    Message(asmr_e_invalid_operand_type);
+                  oper.opr.typ:=OPR_REGISTER;
+                  oper.opr.reg:=tempreg;
+                end
+              else
+                Message(asmr_e_syn_operand);
+            end;
+
+          AS_end,
+          AS_SEPARATOR,
+          AS_COMMA: ;
+        else
+          Begin
+            Message(asmr_e_syn_operand);
+            Consume(actasmtoken);
+          end;
+        end; { end case }
+      end;
+
+    procedure txtensaattreader.BuildSpecialreg(oper: TXtensaOperand);
+      var
+        hs, reg : String;
+        ch : char;
+        i, t : longint;
+        hreg : tregister;
+        flags : tspecialregflags;
+      begin
+        hreg:=NR_NO;
+        case actasmtoken of
+          AS_REGISTER:
+            begin
+              oper.opr.typ:=OPR_REGISTER;
+              oper.opr.reg:=actasmregister;
+              Consume(AS_REGISTER);
+            end;
+          else
+            Message(asmr_e_invalid_operand_type);
+        end;
+      end;
+
+
+{*****************************************************************************
+                                tarmattreader
+*****************************************************************************}
+
+    procedure txtensaattreader.BuildOpCode(instr : TXtensaInstruction);
+      var
+        operandnum : longint;
+      Begin
+        { opcode }
+        if (actasmtoken<>AS_OPCODE) then
+         Begin
+           Message(asmr_e_invalid_or_missing_opcode);
+           RecoverConsume(true);
+           exit;
+         end;
+        { Fill the instr object with the current state }
+        with instr do
+          begin
+            Opcode:=ActOpcode;
+            condition:=ActCondition;
+          end;
+
+        { We are reading operands, so opcode will be an AS_ID }
+        operandnum:=1;
+        Consume(AS_OPCODE);
+        { Zero operand opcode ?  }
+        if actasmtoken in [AS_SEPARATOR,AS_end] then
+         begin
+           operandnum:=0;
+           exit;
+         end;
+        { Read the operands }
+        repeat
+          case actasmtoken of
+            AS_COMMA: { Operand delimiter }
+              Begin
+                if operandnum>Max_Operands then
+                  Message(asmr_e_too_many_operands)
+                else
+                  Inc(operandnum);
+                Consume(AS_COMMA);
+              end;
+            AS_SEPARATOR,
+            AS_end : { End of asm operands for this opcode  }
+              begin
+                break;
+              end;
+          else
+            BuildOperand(instr.Operands[operandnum] as TXtensaOperand);
+          end; { end case }
+        until false;
+        instr.Ops:=operandnum;
+      end;
+
+
+    function txtensaattreader.is_asmopcode(const s: string):boolean;
+
+      //const
+      //  { sorted by length so longer postfixes will match first }
+      //  postfix2strsorted : array[1..70] of string[9] = (
+      //    '.F32.S32','.F32.U32','.S32.F32','.U32.F32','.F64.S32','.F64.U32','.S32.F64','.U32.F64',
+      //    '.F32.S16','.F32.U16','.S16.F32','.U16.F32','.F64.S16','.F64.U16','.S16.F64','.U16.F64',
+      //    '.F32.F64','.F64.F32',
+      //    '.I16','.I32','.I64','.S16','.S32','.S64','.U16','.U32','.U64','.F32','.F64',
+      //    'IAD','DBD','FDD','EAD','IAS','DBS','FDS','EAS','IAX','DBX','FDX','EAX',
+      //    '.16','.32','.64','.I8','.S8','.U8','.P8',
+      //    'EP','SB','BT','SH','IA','IB','DA','DB','FD','FA','ED','EA',
+      //    '.8','S','D','E','P','X','R','B','H','T');
+      //
+      //  postfixsorted : array[1..70] of TOpPostfix = (
+      //    PF_F32S32,PF_F32U32,PF_S32F32,PF_U32F32,PF_F64S32,PF_F64U32,PF_S32F64,PF_U32F64,
+      //    PF_F32S16,PF_F32U16,PF_S16F32,PF_U16F32,PF_F64S16,PF_F64U16,PF_S16F64,PF_U16F64,
+      //    PF_F32F64,PF_F64F32,
+      //    PF_I16,PF_I32,
+      //    PF_I64,PF_S16,PF_S32,PF_S64,PF_U16,PF_U32,PF_U64,PF_F32,
+      //    PF_F64,PF_IAD,PF_DBD,PF_FDD,PF_EAD,
+      //    PF_IAS,PF_DBS,PF_FDS,PF_EAS,PF_IAX,
+      //    PF_DBX,PF_FDX,PF_EAX,PF_16,PF_32,
+      //    PF_64,PF_I8,PF_S8,PF_U8,PF_P8,
+      //    PF_EP,PF_SB,PF_BT,PF_SH,PF_IA,
+      //    PF_IB,PF_DA,PF_DB,PF_FD,PF_FA,
+      //    PF_ED,PF_EA,PF_8,PF_S,PF_D,PF_E,
+      //    PF_P,PF_X,PF_R,PF_B,PF_H,PF_T);
+
+      var
+        j, j2 : longint;
+        hs,hs2 : string;
+        maxlen : longint;
+        icond : tasmcond;
+      Begin
+        { making s a value parameter would break other assembler readers }
+        hs:=s;
+        is_asmopcode:=false;
+
+        { clear op code }
+        actopcode:=A_None;
+
+        actcondition:=C_None;
+
+        { first, handle B else BLS is read wrong }
+        if ((hs[1]='J') and (length(hs)=3)) then
+          begin
+            for icond:=low(tasmcond) to high(tasmcond) do
+              begin
+                if copy(hs,2,3)=uppercond2str[icond] then
+                  begin
+                    actopcode:=A_J;
+                    actasmtoken:=AS_OPCODE;
+                    actcondition:=icond;
+                    is_asmopcode:=true;
+                    exit;
+                  end;
+              end;
+          end;
+        maxlen:=min(length(hs),6);
+        actopcode:=A_NONE;
+        j2:=maxlen;
+        hs2:=hs;
+        while j2>=1 do
+          begin
+            hs:=hs2;
+            while j2>=1 do
+              begin
+                actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j2))));
+                if actopcode<>A_NONE then
+                  begin
+                    actasmtoken:=AS_OPCODE;
+                    { strip op code }
+                    delete(hs,1,j2);
+                    dec(j2);
+                    break;
+                  end;
+                dec(j2);
+              end;
+
+            if actopcode=A_NONE then
+              exit;
+
+              begin
+                { search for condition, conditions are always 2 chars }
+                if length(hs)>1 then
+                  begin
+                    for icond:=low(tasmcond) to high(tasmcond) do
+                      begin
+                        if copy(hs,1,2)=uppercond2str[icond] then
+                          begin
+                            actcondition:=icond;
+                            { strip condition }
+                            delete(hs,1,2);
+                            break;
+                          end;
+                      end;
+                  end;
+                { check for postfix }
+                //if (length(hs)>0) and (actoppostfix=PF_None) then
+                //  begin
+                //    for j:=low(postfixsorted) to high(postfixsorted) do
+                //      begin
+                //        if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                //          begin
+                //            actoppostfix:=postfixsorted[j];
+                //            { strip postfix }
+                //            delete(hs,1,length(postfix2strsorted[j]));
+                //            break;
+                //          end;
+                //      end;
+                //  end;
+              end;
+            { check for format postfix }
+            //if length(hs)>0 then
+            //  begin
+            //    if copy(hs,1,2) = '.W' then
+            //      begin
+            //        actwideformat:=true;
+            //        delete(hs,1,2);
+            //      end;
+            //  end;
+            { if we stripped all postfixes, it's a valid opcode }
+            is_asmopcode:=length(hs)=0;
+            if is_asmopcode = true then
+              break;
+          end;
+      end;
+
+
+    procedure txtensaattreader.ConvertCalljmp(instr : TXtensaInstruction);
+      var
+        newopr : toprrec;
+      begin
+        if instr.Operands[1].opr.typ=OPR_REFERENCE then
+          begin
+            newopr.typ:=OPR_SYMBOL;
+            newopr.symbol:=instr.Operands[1].opr.ref.symbol;
+            newopr.symofs:=instr.Operands[1].opr.ref.offset;
+            if (instr.Operands[1].opr.ref.base<>NR_NO) or
+              (instr.Operands[1].opr.ref.index<>NR_NO) then
+              Message(asmr_e_syn_operand);
+            instr.Operands[1].opr:=newopr;
+          end;
+      end;
+
+
+    procedure txtensaattreader.HandleTargetDirective;
+      var
+        symname,
+        symval  : String;
+        val     : tcgint;
+        symtyp  : TAsmsymtype;
+      begin
+        case actasmpattern of
+          '.thumb_set':
+            begin
+              consume(AS_TARGET_DIRECTIVE);
+              BuildConstSymbolExpression(true,false,false, val,symname,symtyp);
+              Consume(AS_COMMA);
+              BuildConstSymbolExpression(true,false,false, val,symval,symtyp);
+
+              curList.concat(tai_symbolpair.create(spk_thumb_set,symname,symval));
+            end;
+          '.code':
+            begin
+              consume(AS_TARGET_DIRECTIVE);
+              val:=BuildConstExpression(false,false);
+              if not(val in [16,32]) then
+                Message(asmr_e_invalid_code_value);
+              curList.concat(tai_directive.create(asd_code,tostr(val)));
+            end;
+          '.thumb_func':
+            begin
+              consume(AS_TARGET_DIRECTIVE);
+              curList.concat(tai_directive.create(asd_thumb_func,''));
+            end
+          else
+            inherited HandleTargetDirective;
+        end;
+      end;
+
+
+    procedure txtensaattreader.handleopcode;
+      var
+        instr : TXtensaInstruction;
+      begin
+        instr:=TXtensaInstruction.Create(TXtensaOperand);
+        BuildOpcode(instr);
+        if is_calljmp(instr.opcode) then
+          ConvertCalljmp(instr);
+        {
+        instr.AddReferenceSizes;
+        instr.SetInstructionOpsize;
+        instr.CheckOperandSizes;
+        }
+        instr.ConcatInstruction(curlist);
+        instr.Free;
+//        actoppostfix:=PF_None;
+//        actwideformat:=false;
+      end;
+
+
+{*****************************************************************************
+                                     Initialize
+*****************************************************************************}
+
+const
+  asmmode_xtensa_att_info : tasmmodeinfo =
+          (
+            id    : asmmode_arm_gas;
+            idtxt : 'DIVIDED';
+            casmreader : txtensaattreader;
+          );
+
+initialization
+  RegisterAsmMode(asmmode_xtensa_att_info);
+end.
+

+ 45 - 0
compiler/xtensa/raxtensa.pas

@@ -0,0 +1,45 @@
+{
+    Copyright (c) 1998-2003 by Carl Eric Codere and Peter Vreman
+
+    Handles the common Xtensa assembler reader routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit raxtensa;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      cpubase,
+      aasmtai,aasmdata,
+      rautils;
+
+    type
+      TXtensaOperand=class(TOperand)
+      end;
+
+      TXtensaInstruction=class(TInstruction)
+      end;
+
+  implementation
+
+    uses
+      aasmcpu;
+
+end.

+ 114 - 0
compiler/xtensa/rgcpu.pas

@@ -0,0 +1,114 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    This unit implements the Xtensa specific class for the register
+    allocator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit rgcpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      cgbase,cgutils,
+      cpubase,
+      globtype,
+      rgobj;
+
+    type
+      trgcpu=class(trgobj)
+        procedure do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
+        procedure do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
+       protected
+        procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      end;
+
+      trgintcpu=class(trgcpu)
+        procedure add_cpu_interferences(p: tai); override;
+      end;
+
+
+implementation
+
+    uses
+      verbose,cutils,
+      cgobj;
+
+    procedure trgcpu.do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      begin
+        do_spill_op(list,A_L32I,pos,spilltemp,tempreg,orgsupreg);
+      end;
+
+
+    procedure trgcpu.do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      begin
+        do_spill_op(list,A_S32I,pos,spilltemp,tempreg,orgsupreg);
+      end;
+
+
+    procedure trgcpu.do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      var
+        helpins  : tai;
+        tmpref   : treference;
+        helplist : TAsmList;
+        hreg     : tregister;
+        isload   : boolean;
+      begin
+        isload:=op in [A_L32I,A_LSI];
+        //{ offset out of range for regular load/store? }
+        //if simple_ref_type(op,reg_cgsize(tempreg),PF_None,spilltemp)<>sr_simple then
+        //  begin
+        //    helplist:=TAsmList.create;
+        //
+        //    if getregtype(tempreg)=R_INTREGISTER then
+        //      hreg:=tempreg
+        //    else
+        //      hreg:=cg.getaddressregister(helplist);
+        //
+        //    cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
+        //    reference_reset_base(tmpref,spilltemp.base,0,spilltemp.temppos,sizeof(pint),[]);
+        //    tmpref.index:=hreg;
+        //    if isload then
+        //      helpins:=spilling_create_load(tmpref,tempreg)
+        //    else
+        //      helpins:=spilling_create_store(tempreg,tmpref);
+        //    helplist.concat(helpins);
+        //    add_cpu_interferences(helpins);
+        //    list.insertlistafter(pos,helplist);
+        //    helplist.free;
+        //  end
+        //else
+        if isload then
+          inherited do_spill_read(list,pos,spilltemp,tempreg,orgsupreg)
+        else
+          inherited do_spill_written(list,pos,spilltemp,tempreg,orgsupreg)
+      end;
+
+
+    procedure trgintcpu.add_cpu_interferences(p: tai);
+     var
+       i, j: longint;
+     begin
+       if p.typ=ait_instruction then
+         begin
+         end;
+     end;
+
+end.

+ 216 - 0
compiler/xtensa/symcpu.pas

@@ -0,0 +1,216 @@
+{
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Symbol table overrides for Xtensa
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit symcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  symconst,symtype,symdef,symsym;
+
+type
+  { defs }
+  tcpufiledef = class(tfiledef)
+  end;
+  tcpufiledefclass = class of tcpufiledef;
+
+  tcpuvariantdef = class(tvariantdef)
+  end;
+  tcpuvariantdefclass = class of tcpuvariantdef;
+
+  tcpuformaldef = class(tformaldef)
+  end;
+  tcpuformaldefclass = class of tcpuformaldef;
+
+  tcpuforwarddef = class(tforwarddef)
+  end;
+  tcpuforwarddefclass = class of tcpuforwarddef;
+
+  tcpuundefineddef = class(tundefineddef)
+  end;
+  tcpuundefineddefclass = class of tcpuundefineddef;
+
+  tcpuerrordef = class(terrordef)
+  end;
+  tcpuerrordefclass = class of tcpuerrordef;
+
+  tcpupointerdef = class(tpointerdef)
+  end;
+  tcpupointerdefclass = class of tcpupointerdef;
+
+  tcpurecorddef = class(trecorddef)
+  end;
+  tcpurecorddefclass = class of tcpurecorddef;
+
+  tcpuimplementedinterface = class(timplementedinterface)
+  end;
+  tcpuimplementedinterfaceclass = class of tcpuimplementedinterface;
+
+  tcpuobjectdef = class(tobjectdef)
+  end;
+  tcpuobjectdefclass = class of tcpuobjectdef;
+
+  tcpuclassrefdef = class(tclassrefdef)
+  end;
+  tcpuclassrefdefclass = class of tcpuclassrefdef;
+
+  tcpuarraydef = class(tarraydef)
+  end;
+  tcpuarraydefclass = class of tcpuarraydef;
+
+  tcpuorddef = class(torddef)
+  end;
+  tcpuorddefclass = class of tcpuorddef;
+
+  tcpufloatdef = class(tfloatdef)
+  end;
+  tcpufloatdefclass = class of tcpufloatdef;
+
+  tcpuprocvardef = class(tprocvardef)
+  end;
+  tcpuprocvardefclass = class of tcpuprocvardef;
+
+  tcpuprocdef = class(tprocdef)
+  end;
+  tcpuprocdefclass = class of tcpuprocdef;
+
+  tcpustringdef = class(tstringdef)
+  end;
+  tcpustringdefclass = class of tcpustringdef;
+
+  tcpuenumdef = class(tenumdef)
+  end;
+  tcpuenumdefclass = class of tcpuenumdef;
+
+  tcpusetdef = class(tsetdef)
+  end;
+  tcpusetdefclass = class of tcpusetdef;
+
+  { syms }
+  tcpulabelsym = class(tlabelsym)
+  end;
+  tcpulabelsymclass = class of tcpulabelsym;
+
+  tcpuunitsym = class(tunitsym)
+  end;
+  tcpuunitsymclass = class of tcpuunitsym;
+
+  tcpuprogramparasym = class(tprogramparasym)
+  end;
+  tcpuprogramparasymclass = class(tprogramparasym);
+
+  tcpunamespacesym = class(tnamespacesym)
+  end;
+  tcpunamespacesymclass = class of tcpunamespacesym;
+
+  tcpuprocsym = class(tprocsym)
+  end;
+  tcpuprocsymclass = class of tcpuprocsym;
+
+  tcputypesym = class(ttypesym)
+  end;
+  tcpuypesymclass = class of tcputypesym;
+
+  tcpufieldvarsym = class(tfieldvarsym)
+  end;
+  tcpufieldvarsymclass = class of tcpufieldvarsym;
+
+  tcpulocalvarsym = class(tlocalvarsym)
+  end;
+  tcpulocalvarsymclass = class of tcpulocalvarsym;
+
+  tcpuparavarsym = class(tparavarsym)
+  end;
+  tcpuparavarsymclass = class of tcpuparavarsym;
+
+  tcpustaticvarsym = class(tstaticvarsym)
+  end;
+  tcpustaticvarsymclass = class of tcpustaticvarsym;
+
+  tcpuabsolutevarsym = class(tabsolutevarsym)
+  end;
+  tcpuabsolutevarsymclass = class of tcpuabsolutevarsym;
+
+  tcpupropertysym = class(tpropertysym)
+  end;
+  tcpupropertysymclass = class of tcpupropertysym;
+
+  tcpuconstsym = class(tconstsym)
+  end;
+  tcpuconstsymclass = class of tcpuconstsym;
+
+  tcpuenumsym = class(tenumsym)
+  end;
+  tcpuenumsymclass = class of tcpuenumsym;
+
+  tcpusyssym = class(tsyssym)
+  end;
+  tcpusyssymclass = class of tcpusyssym;
+
+
+const
+   pbestrealtype : ^tdef = @s64floattype;
+
+
+implementation
+
+begin
+  { used tdef classes }
+  cfiledef:=tcpufiledef;
+  cvariantdef:=tcpuvariantdef;
+  cformaldef:=tcpuformaldef;
+  cforwarddef:=tcpuforwarddef;
+  cundefineddef:=tcpuundefineddef;
+  cerrordef:=tcpuerrordef;
+  cpointerdef:=tcpupointerdef;
+  crecorddef:=tcpurecorddef;
+  cimplementedinterface:=tcpuimplementedinterface;
+  cobjectdef:=tcpuobjectdef;
+  cclassrefdef:=tcpuclassrefdef;
+  carraydef:=tcpuarraydef;
+  corddef:=tcpuorddef;
+  cfloatdef:=tcpufloatdef;
+  cprocvardef:=tcpuprocvardef;
+  cprocdef:=tcpuprocdef;
+  cstringdef:=tcpustringdef;
+  cenumdef:=tcpuenumdef;
+  csetdef:=tcpusetdef;
+
+  { used tsym classes }
+  clabelsym:=tcpulabelsym;
+  cunitsym:=tcpuunitsym;
+  cprogramparasym:=tcpuprogramparasym;
+  cnamespacesym:=tcpunamespacesym;
+  cprocsym:=tcpuprocsym;
+  ctypesym:=tcputypesym;
+  cfieldvarsym:=tcpufieldvarsym;
+  clocalvarsym:=tcpulocalvarsym;
+  cparavarsym:=tcpuparavarsym;
+  cstaticvarsym:=tcpustaticvarsym;
+  cabsolutevarsym:=tcpuabsolutevarsym;
+  cpropertysym:=tcpupropertysym;
+  cconstsym:=tcpuconstsym;
+  cenumsym:=tcpuenumsym;
+  csyssym:=tcpusyssym;
+end.
+

+ 23 - 0
compiler/xtensa/xtensaatt.inc

@@ -0,0 +1,23 @@
+(
+'none',
+'bcc',
+'bt',
+'call0',
+'call4',
+'call8',
+'call12',
+'callx0',
+'callx4',
+'callx8',
+'callx12',
+'l32i',
+'lsi',
+'j',
+'mov',
+'mov.s',
+'nop',
+'s32i',
+'ssi'
+);
+
+

+ 23 - 0
compiler/xtensa/xtensaop.inc

@@ -0,0 +1,23 @@
+(
+A_NONE,
+A_Bcc,
+A_BT,
+A_CALL0,
+A_CALL4,
+A_CALL8,
+A_CALL12,
+A_CALLX0,
+A_CALLX4,
+A_CALLX8,
+A_CALLX12,
+A_L32I,
+A_LSI,
+A_J,
+A_MOV,
+A_MOV_S,
+A_NOP,
+A_S32I,
+A_SSI
+);
+
+