Browse Source

* synchronize with trunk

git-svn-id: branches/z80@44397 -
nickysn 5 years ago
parent
commit
755fe97c51
100 changed files with 10277 additions and 3187 deletions
  1. 614 138
      .gitattributes
  2. 21 21
      .gitignore
  3. 181 202
      Makefile
  4. 30 46
      Makefile.fpc
  5. 320 152
      compiler/Makefile
  6. 307 70
      compiler/Makefile.fpc
  7. 2 1
      compiler/aarch64/a64att.inc
  8. 1 0
      compiler/aarch64/a64atts.inc
  9. 2 0
      compiler/aarch64/a64ins.dat
  10. 2 1
      compiler/aarch64/a64op.inc
  11. 67 2
      compiler/aarch64/a64reg.dat
  12. 106 10
      compiler/aarch64/aasmcpu.pas
  13. 15 4
      compiler/aarch64/agcpugas.pas
  14. 513 12
      compiler/aarch64/aoptcpu.pas
  15. 2 4
      compiler/aarch64/aoptcpub.pas
  16. 163 53
      compiler/aarch64/cgcpu.pas
  17. 76 24
      compiler/aarch64/cpubase.pas
  18. 8 4
      compiler/aarch64/cpuinfo.pas
  19. 7 4
      compiler/aarch64/cpunode.pas
  20. 125 85
      compiler/aarch64/cpupara.pas
  21. 3 0
      compiler/aarch64/cputarg.pas
  22. 11 7
      compiler/aarch64/hlcgcpu.pas
  23. 9 0
      compiler/aarch64/ncpuadd.pas
  24. 0 1
      compiler/aarch64/ncpucnv.pas
  25. 90 0
      compiler/aarch64/ncpucon.pas
  26. 94 1
      compiler/aarch64/ncpuinl.pas
  27. 75 9
      compiler/aarch64/ncpumat.pas
  28. 1 1
      compiler/aarch64/ncpumem.pas
  29. 121 6
      compiler/aarch64/ncpuset.pas
  30. 64 0
      compiler/aarch64/ra64con.inc
  31. 64 0
      compiler/aarch64/ra64dwa.inc
  32. 1 1
      compiler/aarch64/ra64nor.inc
  33. 64 0
      compiler/aarch64/ra64num.inc
  34. 201 137
      compiler/aarch64/ra64rni.inc
  35. 200 136
      compiler/aarch64/ra64sri.inc
  36. 64 0
      compiler/aarch64/ra64sta.inc
  37. 64 0
      compiler/aarch64/ra64std.inc
  38. 64 0
      compiler/aarch64/ra64sup.inc
  39. 3 0
      compiler/aarch64/racpu.pas
  40. 15 12
      compiler/aarch64/racpugas.pas
  41. 5 1
      compiler/aarch64/rgcpu.pas
  42. 16 113
      compiler/aasmbase.pas
  43. 198 0
      compiler/aasmcfi.pas
  44. 199 40
      compiler/aasmcnst.pas
  45. 112 4
      compiler/aasmdata.pas
  46. 1 1
      compiler/aasmdef.pas
  47. 4 0
      compiler/aasmsym.pas
  48. 375 62
      compiler/aasmtai.pas
  49. 322 89
      compiler/aggas.pas
  50. 22 5
      compiler/aopt.pas
  51. 49 25
      compiler/aoptbase.pas
  52. 2 2
      compiler/aoptda.pas
  53. 1198 140
      compiler/aoptobj.pas
  54. 52 6
      compiler/aoptutils.pas
  55. 248 89
      compiler/arm/aasmcpu.pas
  56. 53 15
      compiler/arm/agarmgas.pas
  57. 233 41
      compiler/arm/aoptcpu.pas
  58. 19 6
      compiler/arm/aoptcpub.pas
  59. 1 0
      compiler/arm/armatt.inc
  60. 1 0
      compiler/arm/armatts.inc
  61. 15 1
      compiler/arm/armins.dat
  62. 1 1
      compiler/arm/armnop.inc
  63. 1 0
      compiler/arm/armop.inc
  64. 70 0
      compiler/arm/armtab.inc
  65. 261 136
      compiler/arm/cgcpu.pas
  66. 76 20
      compiler/arm/cpubase.pas
  67. 23 5
      compiler/arm/cpuelf.pas
  68. 84 9
      compiler/arm/cpuinfo.pas
  69. 9 3
      compiler/arm/cpunode.pas
  70. 214 63
      compiler/arm/cpupara.pas
  71. 16 7
      compiler/arm/cpupi.pas
  72. 6 7
      compiler/arm/hlcgcpu.pas
  73. 24 22
      compiler/arm/narmadd.pas
  74. 1 1
      compiler/arm/narmcal.pas
  75. 10 14
      compiler/arm/narmcnv.pas
  76. 93 64
      compiler/arm/narmcon.pas
  77. 51 60
      compiler/arm/narminl.pas
  78. 172 0
      compiler/arm/narmld.pas
  79. 40 30
      compiler/arm/narmmat.pas
  80. 40 36
      compiler/arm/narmset.pas
  81. 327 0
      compiler/arm/narmutil.pas
  82. 17 12
      compiler/arm/raarmgas.pas
  83. 14 6
      compiler/arm/rgcpu.pas
  84. 2 2
      compiler/arm/symcpu.pas
  85. 121 0
      compiler/armgen/armpara.pas
  86. 279 18
      compiler/assemble.pas
  87. 115 24
      compiler/avr/aasmcpu.pas
  88. 25 16
      compiler/avr/agavrgas.pas
  89. 281 152
      compiler/avr/aoptcpu.pas
  90. 0 4
      compiler/avr/aoptcpub.pas
  91. 39 35
      compiler/avr/avrreg.dat
  92. 22 0
      compiler/avr/ccpuinnr.inc
  93. 374 281
      compiler/avr/cgcpu.pas
  94. 48 53
      compiler/avr/cpubase.pas
  95. 445 287
      compiler/avr/cpuinfo.pas
  96. 1 0
      compiler/avr/cpunode.pas
  97. 38 21
      compiler/avr/cpupara.pas
  98. 3 2
      compiler/avr/cpupi.pas
  99. 2 3
      compiler/avr/hlcgcpu.pas
  100. 37 9
      compiler/avr/navradd.pas

File diff suppressed because it is too large
+ 614 - 138
.gitattributes


+ 21 - 21
.gitignore

@@ -238,27 +238,6 @@ compiler/x86_64/lazbuild/fpcmade.*
 compiler/x86_64/lazbuild/units
 compiler/x86_64/lazbuild/units
 compiler/x86_64/units
 compiler/x86_64/units
 /fpcmade.*
 /fpcmade.*
-ide/*.bak
-ide/*.exe
-ide/*.o
-ide/*.ppu
-ide/*.s
-ide/compiler/*.bak
-ide/compiler/*.exe
-ide/compiler/*.o
-ide/compiler/*.ppu
-ide/compiler/*.s
-ide/compiler/fpcmade.*
-ide/compiler/units
-ide/fakegdb/*.bak
-ide/fakegdb/*.exe
-ide/fakegdb/*.o
-ide/fakegdb/*.ppu
-ide/fakegdb/*.s
-ide/fakegdb/fpcmade.*
-ide/fakegdb/units
-ide/fpcmade.*
-ide/units
 installer/*.bak
 installer/*.bak
 installer/*.exe
 installer/*.exe
 installer/*.o
 installer/*.o
@@ -2689,6 +2668,27 @@ packages/iconvenc/src/build-stamp.*
 packages/iconvenc/src/fpcmade.*
 packages/iconvenc/src/fpcmade.*
 packages/iconvenc/src/units
 packages/iconvenc/src/units
 packages/iconvenc/units
 packages/iconvenc/units
+packages/ide/*.bak
+packages/ide/*.exe
+packages/ide/*.o
+packages/ide/*.ppu
+packages/ide/*.s
+packages/ide/compiler/*.bak
+packages/ide/compiler/*.exe
+packages/ide/compiler/*.o
+packages/ide/compiler/*.ppu
+packages/ide/compiler/*.s
+packages/ide/compiler/fpcmade.*
+packages/ide/compiler/units
+packages/ide/fakegdb/*.bak
+packages/ide/fakegdb/*.exe
+packages/ide/fakegdb/*.o
+packages/ide/fakegdb/*.ppu
+packages/ide/fakegdb/*.s
+packages/ide/fakegdb/fpcmade.*
+packages/ide/fakegdb/units
+packages/ide/fpcmade.*
+packages/ide/units
 packages/imagemagick/*.bak
 packages/imagemagick/*.bak
 packages/imagemagick/*.exe
 packages/imagemagick/*.exe
 packages/imagemagick/*.o
 packages/imagemagick/*.o

File diff suppressed because it is too large
+ 181 - 202
Makefile


+ 30 - 46
Makefile.fpc

@@ -4,10 +4,10 @@
 
 
 [package]
 [package]
 name=fpc
 name=fpc
-version=3.1.1
+version=3.3.1
 
 
 [target]
 [target]
-dirs=compiler rtl utils packages ide installer
+dirs=compiler rtl utils packages installer
 
 
 [require]
 [require]
 nortl=y
 nortl=y
@@ -20,7 +20,7 @@ fpcdir=.
 rule=help
 rule=help
 
 
 [prerules]
 [prerules]
-REQUIREDVERSION=3.0.0
+REQUIREDVERSION=3.0.4
 REQUIREDVERSION2=3.0.2
 REQUIREDVERSION2=3.0.2
 
 
 
 
@@ -49,6 +49,9 @@ endif
 ifeq ($(CPU_TARGET),sparc)
 ifeq ($(CPU_TARGET),sparc)
 PPSUF=sparc
 PPSUF=sparc
 endif
 endif
+ifeq ($(CPU_TARGET),sparc64)
+PPSUF=sparc64
+endif
 ifeq ($(CPU_TARGET),powerpc)
 ifeq ($(CPU_TARGET),powerpc)
 PPSUF=ppc
 PPSUF=ppc
 endif
 endif
@@ -82,6 +85,15 @@ endif
 ifeq ($(CPU_TARGET),aarch64)
 ifeq ($(CPU_TARGET),aarch64)
 PPSUF=a64
 PPSUF=a64
 endif
 endif
+ifeq ($(CPU_TARGET),riscv32)
+PPSUF=rv32
+endif
+ifeq ($(CPU_TARGET),riscv64)
+PPSUF=rv64
+endif
+ifeq ($(CPU_TARGET),xtensa)
+PPSUF=xtensa
+endif
 
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
 # (except if the target cannot run a native compiler)
@@ -188,42 +200,24 @@ endif
 ifneq ($(OPT),)
 ifneq ($(OPT),)
 OPTNEW+=$(OPT)
 OPTNEW+=$(OPT)
 endif
 endif
-CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
-INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 
 
-# This list should be the same as in fpcbuild/Makefile.fpc and in ide/Makefile.fpc
-GDBMI_DEFAULT_OS_LIST=aix freebsd haiku linux netbsd openbsd solaris win32 win64
-#  Determine if we should use GDBMI for Text Mode IDE compilation
-ifndef NOGDBMI
-ifneq ($(findstring $(OS_TARGET),$(GDBMI_DEFAULT_OS_LIST)),)
-export GDBMI=1
+# some targets do not generate PIC by default so we have select explicitly 
+# the general threading model when compiling the final versions of rtl and packages
+ifneq ($(findstring $(OS_TARGET),linux),)
+ifneq ($(findstring $(CPU_TARGET),i386 arm),)
+override OPTNEW+=-CVglobal-dynamic
 endif
 endif
 endif
 endif
 
 
-# Compile also IDE now enabled even for cross-compilation
-# if GDBMI is set
-ifdef GDBMI
-ifneq ($(GDBMI),0)
-IDE=1
-endif
-else # not GDBMI
-# Skipped by default for cross compiles, because it depends on libc
-ifndef CROSSCOMPILE
-ifneq ($(wildcard ide),)
-IDETARGETS=go32v2 win32 win64 linux freebsd os2 emx beos haiku
-ifneq ($(findstring $(OS_TARGET),$(IDETARGETS)),)
-IDE=1
-endif
-endif
-endif
-endif # not GDBMI
+CLEANOPTS=FPC=$(PPNEW)
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)' 'FPCMAKEOPT=$(OPT)'
+INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 
 
 # CPU targets for which we only build the compiler/rtl
 # CPU targets for which we only build the compiler/rtl
 BuildOnlyBaseCPUs=jvm
 BuildOnlyBaseCPUs=jvm
 
 
 ifneq ($(wildcard utils),)
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 ifdef BUILDFULLNATIVE
 UTILS=1
 UTILS=1
@@ -325,19 +319,11 @@ endif
 ifdef UTILS
 ifdef UTILS
         $(MAKE) utils_clean $(CLEANOPTS)
         $(MAKE) utils_clean $(CLEANOPTS)
 endif
 endif
-ifdef IDE
-        $(MAKE) ide_clean $(CLEANOPTS)
-        $(MAKE) installer_clean $(CLEANOPTS)
-endif
 # build everything
 # build everything
         $(MAKE) rtl_$(ALLTARGET) $(BUILDOPTS)
         $(MAKE) rtl_$(ALLTARGET) $(BUILDOPTS)
         $(MAKE) packages_$(ALLTARGET) $(BUILDOPTS)
         $(MAKE) packages_$(ALLTARGET) $(BUILDOPTS)
 ifdef UTILS
 ifdef UTILS
         $(MAKE) utils_all $(BUILDOPTS)
         $(MAKE) utils_all $(BUILDOPTS)
-endif
-ifdef IDE
-        $(MAKE) ide_all $(BUILDOPTS)
-        $(MAKE) installer_all $(BUILDOPTS)
 endif
 endif
         $(ECHOREDIR) Build > $(BUILDSTAMP)
         $(ECHOREDIR) Build > $(BUILDSTAMP)
         $(ECHOREDIR) Build > base.$(BUILDSTAMP)
         $(ECHOREDIR) Build > base.$(BUILDSTAMP)
@@ -370,9 +356,6 @@ installother:
 ifdef UTILS
 ifdef UTILS
         $(MAKE) utils_$(INSTALLTARGET) $(INSTALLOPTS)
         $(MAKE) utils_$(INSTALLTARGET) $(INSTALLOPTS)
 endif
 endif
-ifdef IDE
-        $(MAKE) ide_$(INSTALLTARGET) $(BUILDOPTS)
-endif
 
 
 zipinstallbase:
 zipinstallbase:
         $(MAKE) fpc_zipinstall ZIPTARGET=installbase ZIPNAME=base $(INSTALLOPTS)
         $(MAKE) fpc_zipinstall ZIPTARGET=installbase ZIPNAME=base $(INSTALLOPTS)
@@ -382,9 +365,6 @@ zipinstallother:
 ifdef UTILS
 ifdef UTILS
         $(MAKE) utils_zip$(INSTALLTARGET) $(INSTALLOPTS)
         $(MAKE) utils_zip$(INSTALLTARGET) $(INSTALLOPTS)
 endif
 endif
-ifdef IDE
-        $(MAKE) ide_zip$(INSTALLTARGET) $(INSTALLOPTS)
-endif
 
 
 
 
 installall: $(BUILDSTAMP)
 installall: $(BUILDSTAMP)
@@ -395,7 +375,11 @@ endif
 
 
 singlezipinstall: zipinstallall
 singlezipinstall: zipinstallall
 zipinstallall: $(BUILDSTAMP)
 zipinstallall: $(BUILDSTAMP)
-        $(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+ifeq ($(FULL_SOURCE),$(FULL_TARGET))
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+else
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX).built.on.$(SOURCESUFFIX) $(INSTALLOPTS)
+endif
 
 
 
 
 ##########################################################################
 ##########################################################################
@@ -414,4 +398,4 @@ crosszipinstall:
         $(MAKE) zipinstall CROSSINSTALL=1
         $(MAKE) zipinstall CROSSINSTALL=1
 
 
 crosssinglezipinstall:
 crosssinglezipinstall:
-        $(MAKE) fpc_zipinstall ZIPTARGET=crossinstall ZIPNAME=fpc $(INSTALLOPTS)
+	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(SOURCESUFFIX).cross.$(TARGETSUFFIX) $(INSTALLOPTS)

File diff suppressed because it is too large
+ 320 - 152
compiler/Makefile


+ 307 - 70
compiler/Makefile.fpc

@@ -4,14 +4,14 @@
 
 
 [package]
 [package]
 name=compiler
 name=compiler
-version=3.1.1
+version=3.3.1
 
 
 [target]
 [target]
 programs=pp
 programs=pp
 dirs=utils
 dirs=utils
 
 
 [compiler]
 [compiler]
-targetdir=.
+targetdir=$(CPU_UNITDIR)/bin/$(FULL_TARGET)
 unittargetdir=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 unittargetdir=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 unitdir=$(COMPILERSOURCEDIR)
 unitdir=$(COMPILERSOURCEDIR)
 includedir=$(CPC_TARGET)
 includedir=$(CPC_TARGET)
@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 unexport FPC_VERSION FPC_COMPILERINFO
 
 
 # Which platforms are ready for inclusion in the cycle
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa
 
 
 # All supported targets used for clean
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
 ALLTARGETS=$(CYCLETARGETS)
@@ -47,6 +47,9 @@ endif
 ifdef SPARC
 ifdef SPARC
 PPC_TARGET=sparc
 PPC_TARGET=sparc
 endif
 endif
+ifdef SPARC64
+PPC_TARGET=sparc64
+endif
 ifdef M68K
 ifdef M68K
 PPC_TARGET=m68k
 PPC_TARGET=m68k
 endif
 endif
@@ -80,6 +83,15 @@ endif
 ifdef AARCH64
 ifdef AARCH64
 PPC_TARGET=aarch64
 PPC_TARGET=aarch64
 endif
 endif
+ifdef RISCV32
+PPC_TARGET=riscv32
+endif
+ifdef RISCV64
+PPC_TARGET=riscv64
+endif
+ifdef XTENSA
+PPC_TARGET=xtensa
+endif
 
 
 # Default is to generate a compiler for the same
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
 # platform as CPU_TARGET (a native compiler)
@@ -171,7 +183,6 @@ CPUSUF=386
 endif
 endif
 ifeq ($(CPC_TARGET),m68k)
 ifeq ($(CPC_TARGET),m68k)
 CPUSUF=68k
 CPUSUF=68k
-ALLOW_WARNINGS=1
 endif
 endif
 ifeq ($(CPC_TARGET),powerpc)
 ifeq ($(CPC_TARGET),powerpc)
 CPUSUF=ppc
 CPUSUF=ppc
@@ -182,6 +193,9 @@ endif
 ifeq ($(CPC_TARGET),sparc)
 ifeq ($(CPC_TARGET),sparc)
 CPUSUF=sparc
 CPUSUF=sparc
 endif
 endif
+ifeq ($(CPC_TARGET),sparc64)
+CPUSUF=sparc64
+endif
 ifeq ($(CPC_TARGET),x86_64)
 ifeq ($(CPC_TARGET),x86_64)
 CPUSUF=x64
 CPUSUF=x64
 endif
 endif
@@ -207,6 +221,15 @@ endif
 ifeq ($(CPC_TARGET),aarch64)
 ifeq ($(CPC_TARGET),aarch64)
 CPUSUF=a64
 CPUSUF=a64
 endif
 endif
+ifeq ($(CPC_TARGET),riscv32)
+CPUSUF=rv32
+endif
+ifeq ($(CPC_TARGET),riscv64)
+CPUSUF=rv64
+endif
+ifeq ($(CPC_TARGET),xtensa)
+CPUSUF=xtensa
+endif
 
 
 # Do not define the default -d$(CPU_TARGET) because that
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
 # will conflict with our -d$(CPC_TARGET)
@@ -217,6 +240,15 @@ MSGFILE=msg/error$(FPCLANG).msg
 
 
 
 
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
+PPUDUMPPROG:=$(firstword $(strip $(wildcard $(addsuffix /ppudump$(SRCEXEEXT),$(SEARCHPATH)))))
+ifndef PPUDUMP
+ifdef PPUDUMPPROG
+PPUDUMP=$(PPUDUMPPROG)
+else
+PPUDUMP=ppudump
+endif
+endif
+
 # Check if revision.inc is present
 # Check if revision.inc is present
 REVINC:=$(wildcard revision.inc)
 REVINC:=$(wildcard revision.inc)
 ifneq ($(REVINC),)
 ifneq ($(REVINC),)
@@ -243,12 +275,12 @@ override LOCALOPT+=-d$(CPC_TARGET) -dGDB -dBROWSERLOG
 
 
 #include LLVM define/directory if requested
 #include LLVM define/directory if requested
 ifdef LLVM
 ifdef LLVM
-ifeq ($(findstring $(PPC_TARGET),x86_64),)
-$(error The $(PPC_TARGET) architecture is not (yet) support by the FPC/LLVM code generator)
+ifeq ($(findstring $(PPC_TARGET),x86_64 aarch64 arm),)
+$(error The $(PPC_TARGET) architecture is not (yet) supported by the FPC/LLVM code generator)
 endif
 endif
 
 
 ifeq ($(findstring $(OS_TARGET),darwin iphonesim linux),)
 ifeq ($(findstring $(OS_TARGET),darwin iphonesim linux),)
-$(error The $(PPC_TARGET) target OS is not (yet) support by the FPC/LLVM code generator)
+$(error The $(PPC_TARGET) target OS is not (yet) supported by the FPC/LLVM code generator)
 endif
 endif
 
 
 override LOCALOPT+=-dllvm -Fullvm
 override LOCALOPT+=-dllvm -Fullvm
@@ -256,12 +288,12 @@ endif
 
 
 # i386 specific
 # i386 specific
 ifeq ($(PPC_TARGET),i386)
 ifeq ($(PPC_TARGET),i386)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 endif
 
 
 # x86_64 specific
 # x86_64 specific
 ifeq ($(PPC_TARGET),x86_64)
 ifeq ($(PPC_TARGET),x86_64)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 endif
 
 
 # PowerPC specific
 # PowerPC specific
@@ -281,12 +313,22 @@ endif
 
 
 # Sparc specific
 # Sparc specific
 ifeq ($(PPC_TARGET),sparc)
 ifeq ($(PPC_TARGET),sparc)
-override LOCALOPT+=
+override LOCALOPT+=-Fusparcgen -Fisparcgen
+endif
+
+# Sparc specific
+ifeq ($(PPC_TARGET),sparc64)
+override LOCALOPT+=-Fusparcgen -Fisparcgen
 endif
 endif
 
 
 # ARM specific
 # ARM specific
 ifeq ($(PPC_TARGET),arm)
 ifeq ($(PPC_TARGET),arm)
-override LOCALOPT+=
+override LOCALOPT+=-Fuarmgen
+endif
+
+# ARMEB specific
+ifeq ($(PPC_TARGET),armeb)
+override LOCALOPT+=-Fuarmgen
 endif
 endif
 
 
 # mipsel specific
 # mipsel specific
@@ -299,11 +341,26 @@ ifeq ($(PPC_TARGET),jvm)
 override LOCALOPT+=-Fujvm
 override LOCALOPT+=-Fujvm
 endif
 endif
 
 
+# AArch64 specific
+ifeq ($(PPC_TARGET),aarch64)
+override LOCALOPT+=-Fuarmgen
+endif
+
 # i8086 specific
 # i8086 specific
 ifeq ($(PPC_TARGET),i8086)
 ifeq ($(PPC_TARGET),i8086)
 override LOCALOPT+=-Fux86
 override LOCALOPT+=-Fux86
 endif
 endif
 
 
+# RiscV32 specific
+ifeq ($(PPC_TARGET),riscv32)
+override LOCALOPT+=-Furiscv
+endif
+
+# RiscV64 specific
+ifeq ($(PPC_TARGET),riscv64)
+override LOCALOPT+=-Furiscv
+endif
+
 OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 # symbol liveness WPO requires nm, smart linking and no stripping (the latter
 # symbol liveness WPO requires nm, smart linking and no stripping (the latter
@@ -337,6 +394,16 @@ endif
 ifeq ($(OS_TARGET),win16)
 ifeq ($(OS_TARGET),win16)
 NoNativeBinaries=1
 NoNativeBinaries=1
 endif
 endif
+ifeq ($(OS_TARGET),macos)
+NoNativeBinaries=1
+endif
+
+# Allow install for jvm
+ifeq ($(NoNativeBinaries),1)
+override EXEEXT=$(SRCEXEEXT)
+# In those cases, installation in a cross-installation
+CROSSINSTALL=1
+endif
 
 
 [rules]
 [rules]
 #####################################################################
 #####################################################################
@@ -355,7 +422,7 @@ endif
 
 
 # Use -Sew option by default
 # Use -Sew option by default
 # Allow disabling by setting ALLOW_WARNINGS=1
 # Allow disabling by setting ALLOW_WARNINGS=1
-ifeq ($(findstring 2.4.,$(FPC_VERSION)),)
+ifneq ($(CYCLELEVEL),1)
 ifndef ALLOW_WARNINGS
 ifndef ALLOW_WARNINGS
 override LOCALOPT+=-Sew
 override LOCALOPT+=-Sew
 endif
 endif
@@ -408,23 +475,139 @@ INSTALLEXEFILE=$(EXENAME)
 endif
 endif
 
 
 #####################################################################
 #####################################################################
-# CPU targets
+# Rules to run the compiler trough GDB using utils/gppc386.pp code
+# inside specific levels of cycle.
+# Simply compile utils and utils/gppc386
+# And move generated utils/gppc386 to ./g$(TEMPNAME)
 #####################################################################
 #####################################################################
 
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64
+# Use debugger for all compilations
+ifdef DEBUG_CYCLE
+DEBUG_EXENAME=1
+DEBUG_PPEXENAME=1
+DEBUG_TEMPNAME=1
+DEBUG_PPCROSSNAME=1
+DEBUG_TEMPNAME1=1
+DEBUG_TEMPNAME2=1
+DEBUG_TEMPNAME3=1
+DEBUG_TEMPWPONAME1=1
+DEBUG_TEMPWPONAME2=1
+endif
+
+# Or DEBUG_XXX to only start a specific compiler
+# inside GDB
+ifdef DEBUG_EXENAME
+EXENAMEPREFIX=g
+NEED_G_COMPILERS+=g$(EXENAME)
+endif
+
+ifdef DEBUG_PPEXENAME
+PPEXENAMEPREFIX=g
+NEED_G_COMPILERS+=g$(PPEXENAME)
+endif
+
+ifdef DEBUG_TEMPNAME
+TEMPNAMEPREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME)
+endif
+
+ifdef DEBUG_PPCROSSNAME
+PPCROSSNAMEPREFIX=g
+NEED_G_COMPILERS+=g$(PPCROSSNAME)
+endif
+
+ifdef DEBUG_TEMPNAME1
+TEMPNAME1PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME1)
+endif
+
+ifdef DEBUG_TEMPNAME2
+TEMPNAME2PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME2)
+endif
+
+ifdef DEBUG_TEMPNAME3
+TEMPNAME3PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME3)
+endif
+
+ifdef DEBUG_TEMPWPONAME1
+TEMPNAMEWPO1PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPWPONAME1)
+endif
+
+ifdef DEBUG_TEMPWPONAME2
+TEMPWPONAME2PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPWPONAME2)
+endif
+
+ALL_G_COMPILERS="g$(EXENAME) g$(PPEXENAME) g$(TEMPNAME) g$(PPCROSSNAME) g$(TEMPNAME1) g$(TEMPNAME2) g$(TEMPNAME3) g$(TEMPWPONAME1) g$(TEMPWPONAME2)"
+
+#####################################################################
+# To start a given compiler $(PP) with gdb, copy utils/gppc386 as g$(PP).
+# Symbolic link is not working, full copy required.
+# Use a file as time stamp to avoid recompiling utils/gppc386
+# unless needed.
+#####################################################################
+g$(COMPILERTEMPNAME): fpcmade.generate_g_compilers
+	$(COPY) ./utils/gppc386 ./g$(COMPILERTEMPNAME)
+
+fpcmade.generate_g_compilers: utils/gppc386.pp
+	$(MAKE) rtlclean rtl utils
+	$(MAKE) -C utils gppc386$(EXEEXT)
+	$(GECHO) -n "utils/gppc386 generated at " > $@
+	$(GDATE) >> $@
+
+ifdef EXENAMEPREFIX
+	$(MAKE) g$(EXENAME) COMPILERTEMPNAME=$(EXENAME)
+endif
+ifdef PPEXENAMEPREFIX
+	$(MAKE) g$(PPEXENAME) COMPILERTEMPNAME=$(PPEXENAME)
+endif
+ifdef TEMPNAMEPREFIX
+	$(MAKE) g$(TEMPNAME) COMPILERTEMPNAME=$(TEMPNAME)
+endif
+ifdef PPCROSSNAMEPREFIX
+	$(MAKE) g$(PPCROSSNAME) COMPILERTEMPNAME=$(PPCROSSNAME)
+endif
+ifdef TEMPNAME1PREFIX
+	$(MAKE) g$(TEMPNAME1) COMPILERTEMPNAME=$(TEMPNAME1)
+endif
+ifdef TEMPNAME2PREFIX
+	$(MAKE) g$(TEMPNAME2) COMPILERTEMPNAME=$(TEMPNAME2)
+endif
+ifdef TEMPNAME3PREFIX
+	$(MAKE) g$(TEMPNAME3) COMPILERTEMPNAME=$(TEMPNAME3)
+endif
+ifdef TEMPWPONAME1PREFIX
+	$(MAKE) g$(TEMPWPONAME1) COMPILERTEMPNAME=$(TEMPWPONAME1)
+endif
+ifdef TEMPWPONAME2PREFIX
+	$(MAKE) g$(TEMPWPONAME2) COMPILERTEMPNAME=$(TEMPWPONAME2)
+endif
+
+
+#####################################################################
+# cpu targets
+#####################################################################
+
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa
+PPC_SUFFIXES=386 68k ppc sparc arm armeb x64 ppc64 mips mipsel avr jvm 8086 a64 sparc64 rv32 rv64 xtensa
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 SYMLINKINSTALL_TARGETS=$(addsuffix _symlink_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 SYMLINKINSTALL_TARGETS=$(addsuffix _symlink_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)$(SYMLINKINSTALL_TARGETS)
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)$(SYMLINKINSTALL_TARGETS)
 
 
 $(PPC_TARGETS):
 $(PPC_TARGETS):
-        $(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ all
+        $(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ compiler
 
 
 $(INSTALL_TARGETS):
 $(INSTALL_TARGETS):
-        $(MAKE) all install PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@)
+        $(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) compiler
+		$(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) exeinstall
 
 
 $(SYMLINKINSTALL_TARGETS):
 $(SYMLINKINSTALL_TARGETS):
-        $(MAKE) all installsymlink PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@)
+        $(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) compiler
+		$(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) installsymlink
 
 
 alltargets: $(ALLTARGETS)
 alltargets: $(ALLTARGETS)
 
 
@@ -433,8 +616,6 @@ alltargets: $(ALLTARGETS)
 # Default makefile
 # Default makefile
 #####################################################################
 #####################################################################
 
 
-.NOTPARALLEL:
-
 .PHONY: all compiler echotime ppuclean execlean clean distclean
 .PHONY: all compiler echotime ppuclean execlean clean distclean
 
 
 all: compiler $(addsuffix _all,$(TARGET_DIRS))
 all: compiler $(addsuffix _all,$(TARGET_DIRS))
@@ -460,25 +641,26 @@ ENDTIME:=unknown
 endif
 endif
 
 
 echotime:
 echotime:
-        @echo Start $(STARTTIME) now $(ENDTIME)
+	@echo Start $(STARTTIME) now $(ENDTIME)
 
 
 ppuclean:
 ppuclean:
-        -$(DEL) *$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT)
-        -$(DEL) $(addsuffix /*$(PPUEXT),$(COMPILERSOURCEDIR))
+	-$(DEL) *$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT)
+	-$(DEL) $(addsuffix /*$(PPUEXT),$(COMPILERSOURCEDIR))
 
 
 tempclean:
 tempclean:
-        -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
+	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 
 
 execlean :
 execlean :
-        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT)
-        -$(DEL) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT)
-        -$(DEL) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+	-$(DEL) $(addsuffix $(EXEEXT), $(addprefix ppc, $(PPC_SUFFIXES)))
+	-$(DEL) $(addsuffix $(EXEEXT), $(addprefix ppcross, $(PPC_SUFFIXES)))
+	-$(DEL) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2) $(ALL_G_COMPILERS)
+	-$(DEL) fpcmade.generate_g_compilers
 
 
 $(addsuffix _clean,$(ALLTARGETS)):
 $(addsuffix _clean,$(ALLTARGETS)):
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
+        -$(DELTREE) $(addprefix $(subst _clean,,$@),/bin)
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
+        -$(DEL) $(addprefix $(subst _clean,,$@)/ppc,$(addsuffix $(EXEEXT), $(PPC_SUFFIXES)))
 
 
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
         -$(DEL) $(EXENAME)
         -$(DEL) $(EXENAME)
@@ -532,10 +714,16 @@ regdatarm : arm/armreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmreg.pp
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmreg.pp
         cd arm && ..$(PATHSEP)utils$(PATHSEP)mkarmreg$(SRCEXEEXT)
         cd arm && ..$(PATHSEP)utils$(PATHSEP)mkarmreg$(SRCEXEEXT)
 
 
-regdatsp : sparc/spreg.dat
+regdatsp : sparcgen/spreg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
-        cd sparc && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT)
+        cd sparcgen && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT)
+		mv -f sparcgen/rsp*.inc sparc
 
 
+regdatsp64 : sparcgen/spreg.dat
+            $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
+        cd sparcgen && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT) sparc64
+		mv -f sparcgen/rsp*.inc sparc64
+		
 regdatavr : avr/avrreg.dat
 regdatavr : avr/avrreg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkavrreg.pp
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkavrreg.pp
         cd avr && ..$(PATHSEP)utils$(PATHSEP)mkavrreg$(SRCEXEEXT)
         cd avr && ..$(PATHSEP)utils$(PATHSEP)mkavrreg$(SRCEXEEXT)
@@ -548,6 +736,8 @@ regdatmips : mips/mipsreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkmpsreg.pp
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkmpsreg.pp
         cd mips && ..$(PATHSEP)utils$(PATHSEP)mkmpsreg$(SRCEXEEXT)
         cd mips && ..$(PATHSEP)utils$(PATHSEP)mkmpsreg$(SRCEXEEXT)
 
 
+regdat : regdatx86 regdatarm regdatsp regdatavr regdataarch64 regdatmips regdatsp64 regdatz80
+
 regdatz80 : z80/z80reg.dat
 regdatz80 : z80/z80reg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkz80reg.pp
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkz80reg.pp
         cd z80 && ..$(PATHSEP)utils$(PATHSEP)mkz80reg$(SRCEXEEXT)
         cd z80 && ..$(PATHSEP)utils$(PATHSEP)mkz80reg$(SRCEXEEXT)
@@ -598,7 +788,6 @@ endif
         $(EXECPPAS)
         $(EXECPPAS)
         $(MOVE) $(COMPILER_TARGETDIR)/$(PPEXENAME) $(EXENAME)
         $(MOVE) $(COMPILER_TARGETDIR)/$(PPEXENAME) $(EXENAME)
 
 
-
 #####################################################################
 #####################################################################
 # Cycle targets
 # Cycle targets
 #
 #
@@ -629,22 +818,27 @@ ifeq ($(OS_SOURCE),$(OS_TARGET))
 ifndef NOWPOCYCLE
 ifndef NOWPOCYCLE
 ifdef RELEASE
 ifdef RELEASE
 DOWPOCYCLE=1
 DOWPOCYCLE=1
+endif
+endif
+
+ifdef DOWPOCYCLE
 # Two WPO cycles in case of RELEASE=1
 # Two WPO cycles in case of RELEASE=1
 wpocycle:
 wpocycle:
 # don't use cycle_clean, it will delete the compiler utilities again
 # don't use cycle_clean, it will delete the compiler utilities again
         $(RM) $(EXENAME)
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
         $(RM) $(EXENAME)
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) 
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' compiler
         $(MOVE) $(EXENAME) $(TEMPWPONAME1)
         $(MOVE) $(EXENAME) $(TEMPWPONAME1)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS))
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' compiler
         $(COPY) $(EXENAME) $(TEMPWPONAME2)
         $(COPY) $(EXENAME) $(TEMPWPONAME2)
-endif
-endif
-
-ifndef DOWPOCYCLE
+else
 wpocycle:
 wpocycle:
 endif
 endif
 
 
@@ -669,8 +863,10 @@ next :
         $(COPY) $(FPC) $(EXENAME)
         $(COPY) $(FPC) $(EXENAME)
 else
 else
 next :
 next :
-        $(MAKE) rtlclean rtl
-        $(MAKE) cycleclean compiler
+        $(MAKE) rtlclean
+        $(MAKE) rtl
+        $(MAKE) cycleclean
+        $(MAKE) compiler
         $(MAKE) echotime
         $(MAKE) echotime
 endif
 endif
 
 
@@ -680,20 +876,24 @@ $(TEMPNAME1) :
         $(MOVE) $(EXENAME) $(TEMPNAME1)
         $(MOVE) $(EXENAME) $(TEMPNAME1)
 
 
 $(TEMPNAME2) : $(TEMPNAME1)
 $(TEMPNAME2) : $(TEMPNAME1)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME1)' 'OLDFPC=' next CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME1PREFIX)$(TEMPNAME1)' 'OLDFPC=' next CYCLELEVEL=2
         -$(DEL) $(TEMPNAME2)
         -$(DEL) $(TEMPNAME2)
         $(MOVE) $(EXENAME) $(TEMPNAME2)
         $(MOVE) $(EXENAME) $(TEMPNAME2)
 
 
 $(TEMPNAME3) : $(TEMPNAME2)
 $(TEMPNAME3) : $(TEMPNAME2)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME2)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME1)' next CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME2PREFIX)$(TEMPNAME2)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME1)' next CYCLELEVEL=3
         -$(DEL) $(TEMPNAME3)
         -$(DEL) $(TEMPNAME3)
         $(MOVE) $(EXENAME) $(TEMPNAME3)
         $(MOVE) $(EXENAME) $(TEMPNAME3)
 
 
 cycle:
 cycle:
-        $(MAKE) tempclean $(TEMPNAME3)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME2)' next CYCLELEVEL=4
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
+        $(MAKE) tempclean
+        $(MAKE) $(TEMPNAME3)
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME2)' next CYCLELEVEL=4
         $(DIFF) $(TEMPNAME3) $(EXENAME)
         $(DIFF) $(TEMPNAME3) $(EXENAME)
-        $(MAKE) $(addsuffix _all,$(TARGET_DIRS)) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(addsuffix _all,$(TARGET_DIRS)) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
         $(MAKE) wpocycle
         $(MAKE) wpocycle
         $(MAKE) echotime
         $(MAKE) echotime
 
 
@@ -704,17 +904,26 @@ else
 #
 #
 
 
 cycle:
 cycle:
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
 # ppc (source native)
 # ppc (source native)
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
-        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtlclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler
 # ppcross<ARCH> (source native)
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler
 # ppc<ARCH> (target native)
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 rtl
 ifndef NoNativeBinaries
 ifndef NoNativeBinaries
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 compiler
 endif
 endif
 endif
 endif
 
 
@@ -732,18 +941,27 @@ else
 
 
 cycle: override FPC=
 cycle: override FPC=
 cycle:
 cycle:
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
 # ppc (source native)
 # ppc (source native)
 # Clear detected compiler binary, because it can be existing crosscompiler binary, but we need native compiler here
 # Clear detected compiler binary, because it can be existing crosscompiler binary, but we need native compiler here
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtlclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean 
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler 
 # ppcross<ARCH> (source native)
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler 
 # ppc<ARCH> (target native)
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' CYCLELEVEL=3 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' CYCLELEVEL=3 rtl
 ifndef NoNativeBinaries
 ifndef NoNativeBinaries
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' CYCLELEVEL=3 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' CYCLELEVEL=3 compiler
 endif
 endif
 endif
 endif
 
 
@@ -766,7 +984,8 @@ cvstest:
 #
 #
 # 1. build a compiler using cycle
 # 1. build a compiler using cycle
 # 2. remove all .ppufiles
 # 2. remove all .ppufiles
-# 3. build all supported cross compilers except the
+# 3. clean and recompile rtl if DOWPOCYCLE is set
+# 4. build all supported cross compilers except the
 #    current PPC_TARGET which was already build
 #    current PPC_TARGET which was already build
 # unless FPC_SUPPORT_X87_TYPES_ON_WIN64 is set,
 # unless FPC_SUPPORT_X87_TYPES_ON_WIN64 is set,
 # win64 cannot compile i386 or i8086 compiler
 # win64 cannot compile i386 or i8086 compiler
@@ -777,19 +996,24 @@ ifeq ($(OS_SOURCE),win64)
   EXCLUDE_80BIT_TARGETS=1
   EXCLUDE_80BIT_TARGETS=1
 endif
 endif
 
 
-ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc),)
+ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64 riscv32 riscv64 xtensa),)
   EXCLUDE_80BIT_TARGETS=1
   EXCLUDE_80BIT_TARGETS=1
 endif
 endif
 
 
 full: fullcycle
 full: fullcycle
 
 
 fullcycle:
 fullcycle:
+        $(MAKE) distclean
         $(MAKE) cycle
         $(MAKE) cycle
         $(MAKE) ppuclean
         $(MAKE) ppuclean
+ifdef DOWPOCYCLE
+        $(MAKE) rtlclean
+        $(MAKE) rtl 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
+endif
 ifndef EXCLUDE_80BIT_TARGETS
 ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(filter-out $(PPC_TARGET),$(CYCLETARGETS)) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(filter-out $(PPC_TARGET),$(CYCLETARGETS)) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 else
 else
-        $(MAKE) $(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 endif
 endif
 
 
 #####################################################################
 #####################################################################
@@ -837,12 +1061,13 @@ endif
 
 
 fullinstall:
 fullinstall:
 ifndef EXCLUDE_80BIT_TARGETS
 ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(addsuffix _exe_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))
+        $(MAKE) $(addsuffix _exe_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))) $(addsuffix _all,$(TARGET_DIRS))
 else
 else
-        $(MAKE) $(addsuffix _exe_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))))
+        $(MAKE) $(addsuffix _exe_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))))) $(addsuffix _all,$(TARGET_DIRS))
 endif
 endif
-
-install: quickinstall
+        $(MAKE) $(addsuffix _install,$(TARGET_DIRS))
+        
+auxfilesinstall:
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
 ifdef UNIXHier
 ifdef UNIXHier
         $(MKDIR) $(INSTALL_BASEDIR)
         $(MKDIR) $(INSTALL_BASEDIR)
@@ -851,6 +1076,15 @@ endif
         $(MKDIR) $(MSGINSTALLDIR)
         $(MKDIR) $(MSGINSTALLDIR)
         $(INSTALL) $(MSGFILES) $(MSGINSTALLDIR)
         $(INSTALL) $(MSGFILES) $(MSGINSTALLDIR)
 endif
 endif
+	
+
+install: 
+# if no FPC is passed, use that one we assume, we just build
+ifndef FPC
+	$(MAKE) quickinstall auxfilesinstall FPC=$(BASEDIR)/$(INSTALLEXEFILE)
+else
+	$(MAKE) quickinstall auxfilesinstall
+endif
 
 
 # This also installs a link from bin to the actual executable.
 # This also installs a link from bin to the actual executable.
 # The .deb does that later.
 # The .deb does that later.
@@ -892,7 +1126,7 @@ ALLPPUDIR=$(CPU_TARGET)/units/*
 PPULIST=$(wildcard $(PPUDIR)/*.ppu)
 PPULIST=$(wildcard $(PPUDIR)/*.ppu)
 PPULOGLIST=$(subst .ppu,.log-ppu,$(PPULIST))
 PPULOGLIST=$(subst .ppu,.log-ppu,$(PPULIST))
 
 
-RTLPPUDIR=../rtl/units/$(FULL_TARGET)
+RTLPPUDIR=../rtl/units/$(TARGETSUFFIX)
 RTLPPULIST=$(wildcard $(RTLPPUDIR)/*.ppu)
 RTLPPULIST=$(wildcard $(RTLPPUDIR)/*.ppu)
 RTLPPULOGLIST=$(subst .ppu,.log-ppu,$(RTLPPULIST))
 RTLPPULOGLIST=$(subst .ppu,.log-ppu,$(RTLPPULIST))
 
 
@@ -905,8 +1139,9 @@ rtlppulogs : $(RTLPPULOGLIST)
 vpath %.ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 vpath %.ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 vpath %.log-ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 vpath %.log-ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 
 
-%.log-ppu : %.ppu ./utils/ppudump$(EXEEXT)
-	.$(PATHSEP)utils$(PATHSEP)ppudump -VA -M $< > $@
+# Use installed ppudump
+%.log-ppu : %.ppu
+	$(PPUDUMP) -VA -M $< > $@
 
 
 
 
 ./utils/ppudump$(EXEEXT):
 ./utils/ppudump$(EXEEXT):
@@ -915,6 +1150,8 @@ vpath %.log-ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 ppuinfo :
 ppuinfo :
 	echo PPU list is "$(PPULIST)"
 	echo PPU list is "$(PPULIST)"
 	echo PPULOG list is "$(PPULOGLIST)"
 	echo PPULOG list is "$(PPULOGLIST)"
+	echo RTLPPUDIR is "$(RTLPPUDIR)"
+	echo RTLPPU list is "$(RTLPPULIST)"
 
 
 cleanppulogs :
 cleanppulogs :
 	-$(RMPROG) $(PPULOGLIST)
 	-$(RMPROG) $(PPULOGLIST)

+ 2 - 1
compiler/aarch64/a64att.inc

@@ -187,5 +187,6 @@
 'fcsel',
 'fcsel',
 'umov',
 'umov',
 'ins',
 'ins',
-'movi'
+'movi',
+'veor'
 );
 );

+ 1 - 0
compiler/aarch64/a64atts.inc

@@ -187,5 +187,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 2 - 0
compiler/aarch64/a64ins.dat

@@ -376,3 +376,5 @@
 [INS]
 [INS]
 
 
 [MOVI]
 [MOVI]
+
+[VEOR]

+ 2 - 1
compiler/aarch64/a64op.inc

@@ -187,5 +187,6 @@ A_FCMMPE,
 A_FCSEL,
 A_FCSEL,
 A_UMOV,
 A_UMOV,
 A_INS,
 A_INS,
-A_MOVI
+A_MOVI,
+A_VEOR
 );
 );

+ 67 - 2
compiler/aarch64/a64reg.dat

@@ -76,170 +76,235 @@ XZR,$01,$05,$1F,xzr,31,31
 WSP,$01,$04,$20,wsp,31,31
 WSP,$01,$04,$20,wsp,31,31
 SP,$01,$05,$20,sp,31,31
 SP,$01,$05,$20,sp,31,31
 
 
-
 ; vfp registers
 ; vfp registers
+; generated by fpc/compiler/utils/gena64vfp.pp to avoid tedious typing
 B0,$04,$01,$00,b0,64,64
 B0,$04,$01,$00,b0,64,64
 H0,$04,$03,$00,h0,64,64
 H0,$04,$03,$00,h0,64,64
 S0,$04,$09,$00,s0,64,64
 S0,$04,$09,$00,s0,64,64
 D0,$04,$0a,$00,d0,64,64
 D0,$04,$0a,$00,d0,64,64
 Q0,$04,$05,$00,q0,64,64
 Q0,$04,$05,$00,q0,64,64
+V08B,$04,$17,$00,v0.8b,64,64
+V016B,$04,$18,$00,v0.16b,64,64
 B1,$04,$01,$01,b1,65,65
 B1,$04,$01,$01,b1,65,65
 H1,$04,$03,$01,h1,65,65
 H1,$04,$03,$01,h1,65,65
 S1,$04,$09,$01,s1,65,65
 S1,$04,$09,$01,s1,65,65
 D1,$04,$0a,$01,d1,65,65
 D1,$04,$0a,$01,d1,65,65
 Q1,$04,$05,$01,q1,65,65
 Q1,$04,$05,$01,q1,65,65
+V18B,$04,$17,$01,v1.8b,65,65
+V116B,$04,$18,$01,v1.16b,65,65
 B2,$04,$01,$02,b2,66,66
 B2,$04,$01,$02,b2,66,66
 H2,$04,$03,$02,h2,66,66
 H2,$04,$03,$02,h2,66,66
 S2,$04,$09,$02,s2,66,66
 S2,$04,$09,$02,s2,66,66
 D2,$04,$0a,$02,d2,66,66
 D2,$04,$0a,$02,d2,66,66
 Q2,$04,$05,$02,q2,66,66
 Q2,$04,$05,$02,q2,66,66
+V28B,$04,$17,$02,v2.8b,66,66
+V216B,$04,$18,$02,v2.16b,66,66
 B3,$04,$01,$03,b3,67,67
 B3,$04,$01,$03,b3,67,67
 H3,$04,$03,$03,h3,67,67
 H3,$04,$03,$03,h3,67,67
 S3,$04,$09,$03,s3,67,67
 S3,$04,$09,$03,s3,67,67
 D3,$04,$0a,$03,d3,67,67
 D3,$04,$0a,$03,d3,67,67
 Q3,$04,$05,$03,q3,67,67
 Q3,$04,$05,$03,q3,67,67
+V38B,$04,$17,$03,v3.8b,67,67
+V316B,$04,$18,$03,v3.16b,67,67
 B4,$04,$01,$04,b4,68,68
 B4,$04,$01,$04,b4,68,68
 H4,$04,$03,$04,h4,68,68
 H4,$04,$03,$04,h4,68,68
 S4,$04,$09,$04,s4,68,68
 S4,$04,$09,$04,s4,68,68
 D4,$04,$0a,$04,d4,68,68
 D4,$04,$0a,$04,d4,68,68
 Q4,$04,$05,$04,q4,68,68
 Q4,$04,$05,$04,q4,68,68
+V48B,$04,$17,$04,v4.8b,68,68
+V416B,$04,$18,$04,v4.16b,68,68
 B5,$04,$01,$05,b5,69,69
 B5,$04,$01,$05,b5,69,69
 H5,$04,$03,$05,h5,69,69
 H5,$04,$03,$05,h5,69,69
 S5,$04,$09,$05,s5,69,69
 S5,$04,$09,$05,s5,69,69
 D5,$04,$0a,$05,d5,69,69
 D5,$04,$0a,$05,d5,69,69
 Q5,$04,$05,$05,q5,69,69
 Q5,$04,$05,$05,q5,69,69
+V58B,$04,$17,$05,v5.8b,69,69
+V516B,$04,$18,$05,v5.16b,69,69
 B6,$04,$01,$06,b6,70,70
 B6,$04,$01,$06,b6,70,70
 H6,$04,$03,$06,h6,70,70
 H6,$04,$03,$06,h6,70,70
-S6,$04,$09,$06,s6,70,70
+S6,$04,$09,$06,s6,70,70                                                                     gena64vfp.pp
 D6,$04,$0a,$06,d6,70,70
 D6,$04,$0a,$06,d6,70,70
 Q6,$04,$05,$06,q6,70,70
 Q6,$04,$05,$06,q6,70,70
+V68B,$04,$17,$06,v6.8b,70,70
+V616B,$04,$18,$06,v6.16b,70,70
 B7,$04,$01,$07,b7,71,71
 B7,$04,$01,$07,b7,71,71
 H7,$04,$03,$07,h7,71,71
 H7,$04,$03,$07,h7,71,71
 S7,$04,$09,$07,s7,71,71
 S7,$04,$09,$07,s7,71,71
 D7,$04,$0a,$07,d7,71,71
 D7,$04,$0a,$07,d7,71,71
 Q7,$04,$05,$07,q7,71,71
 Q7,$04,$05,$07,q7,71,71
+V78B,$04,$17,$07,v7.8b,71,71
+V716B,$04,$18,$07,v7.16b,71,71
 B8,$04,$01,$08,b8,72,72
 B8,$04,$01,$08,b8,72,72
 H8,$04,$03,$08,h8,72,72
 H8,$04,$03,$08,h8,72,72
 S8,$04,$09,$08,s8,72,72
 S8,$04,$09,$08,s8,72,72
 D8,$04,$0a,$08,d8,72,72
 D8,$04,$0a,$08,d8,72,72
 Q8,$04,$05,$08,q8,72,72
 Q8,$04,$05,$08,q8,72,72
+V88B,$04,$17,$08,v8.8b,72,72
+V816B,$04,$18,$08,v8.16b,72,72
 B9,$04,$01,$09,b9,73,73
 B9,$04,$01,$09,b9,73,73
 H9,$04,$03,$09,h9,73,73
 H9,$04,$03,$09,h9,73,73
 S9,$04,$09,$09,s9,73,73
 S9,$04,$09,$09,s9,73,73
 D9,$04,$0a,$09,d9,73,73
 D9,$04,$0a,$09,d9,73,73
 Q9,$04,$05,$09,q9,73,73
 Q9,$04,$05,$09,q9,73,73
+V98B,$04,$17,$09,v9.8b,73,73
+V916B,$04,$18,$09,v9.16b,73,73
 B10,$04,$01,$0A,b10,74,74
 B10,$04,$01,$0A,b10,74,74
 H10,$04,$03,$0A,h10,74,74
 H10,$04,$03,$0A,h10,74,74
 S10,$04,$09,$0A,s10,74,74
 S10,$04,$09,$0A,s10,74,74
 D10,$04,$0a,$0A,d10,74,74
 D10,$04,$0a,$0A,d10,74,74
 Q10,$04,$05,$0A,q10,74,74
 Q10,$04,$05,$0A,q10,74,74
+V108B,$04,$17,$0A,v10.8b,74,74
+V1016B,$04,$18,$0A,v10.16b,74,74
 B11,$04,$01,$0B,b11,75,75
 B11,$04,$01,$0B,b11,75,75
 H11,$04,$03,$0B,h11,75,75
 H11,$04,$03,$0B,h11,75,75
 S11,$04,$09,$0B,s11,75,75
 S11,$04,$09,$0B,s11,75,75
 D11,$04,$0a,$0B,d11,75,75
 D11,$04,$0a,$0B,d11,75,75
 Q11,$04,$05,$0B,q11,75,75
 Q11,$04,$05,$0B,q11,75,75
+V118B,$04,$17,$0B,v11.8b,75,75
+V1116B,$04,$18,$0B,v11.16b,75,75
 B12,$04,$01,$0C,b12,76,76
 B12,$04,$01,$0C,b12,76,76
 H12,$04,$03,$0C,h12,76,76
 H12,$04,$03,$0C,h12,76,76
 S12,$04,$09,$0C,s12,76,76
 S12,$04,$09,$0C,s12,76,76
 D12,$04,$0a,$0C,d12,76,76
 D12,$04,$0a,$0C,d12,76,76
 Q12,$04,$05,$0C,q12,76,76
 Q12,$04,$05,$0C,q12,76,76
+V128B,$04,$17,$0C,v12.8b,76,76
+V1216B,$04,$18,$0C,v12.16b,76,76
 B13,$04,$01,$0D,b13,77,77
 B13,$04,$01,$0D,b13,77,77
 H13,$04,$03,$0D,h13,77,77
 H13,$04,$03,$0D,h13,77,77
 S13,$04,$09,$0D,s13,77,77
 S13,$04,$09,$0D,s13,77,77
 D13,$04,$0a,$0D,d13,77,77
 D13,$04,$0a,$0D,d13,77,77
 Q13,$04,$05,$0D,q13,77,77
 Q13,$04,$05,$0D,q13,77,77
+V138B,$04,$17,$0D,v13.8b,77,77
+V1316B,$04,$18,$0D,v13.16b,77,77
 B14,$04,$01,$0E,b14,78,78
 B14,$04,$01,$0E,b14,78,78
 H14,$04,$03,$0E,h14,78,78
 H14,$04,$03,$0E,h14,78,78
 S14,$04,$09,$0E,s14,78,78
 S14,$04,$09,$0E,s14,78,78
 D14,$04,$0a,$0E,d14,78,78
 D14,$04,$0a,$0E,d14,78,78
 Q14,$04,$05,$0E,q14,78,78
 Q14,$04,$05,$0E,q14,78,78
+V148B,$04,$17,$0E,v14.8b,78,78
+V1416B,$04,$18,$0E,v14.16b,78,78
 B15,$04,$01,$0F,b15,79,79
 B15,$04,$01,$0F,b15,79,79
 H15,$04,$03,$0F,h15,79,79
 H15,$04,$03,$0F,h15,79,79
 S15,$04,$09,$0F,s15,79,79
 S15,$04,$09,$0F,s15,79,79
 D15,$04,$0a,$0F,d15,79,79
 D15,$04,$0a,$0F,d15,79,79
 Q15,$04,$05,$0F,q15,79,79
 Q15,$04,$05,$0F,q15,79,79
+V158B,$04,$17,$0F,v15.8b,79,79
+V1516B,$04,$18,$0F,v15.16b,79,79
 B16,$04,$01,$10,b16,80,80
 B16,$04,$01,$10,b16,80,80
 H16,$04,$03,$10,h16,80,80
 H16,$04,$03,$10,h16,80,80
 S16,$04,$09,$10,s16,80,80
 S16,$04,$09,$10,s16,80,80
 D16,$04,$0a,$10,d16,80,80
 D16,$04,$0a,$10,d16,80,80
 Q16,$04,$05,$10,q16,80,80
 Q16,$04,$05,$10,q16,80,80
+V168B,$04,$17,$10,v16.8b,80,80
+V1616B,$04,$18,$10,v16.16b,80,80
 B17,$04,$01,$11,b17,81,81
 B17,$04,$01,$11,b17,81,81
 H17,$04,$03,$11,h17,81,81
 H17,$04,$03,$11,h17,81,81
 S17,$04,$09,$11,s17,81,81
 S17,$04,$09,$11,s17,81,81
 D17,$04,$0a,$11,d17,81,81
 D17,$04,$0a,$11,d17,81,81
 Q17,$04,$05,$11,q17,81,81
 Q17,$04,$05,$11,q17,81,81
+V178B,$04,$17,$11,v17.8b,81,81
+V1716B,$04,$18,$11,v17.16b,81,81
 B18,$04,$01,$12,b18,82,82
 B18,$04,$01,$12,b18,82,82
 H18,$04,$03,$12,h18,82,82
 H18,$04,$03,$12,h18,82,82
 S18,$04,$09,$12,s18,82,82
 S18,$04,$09,$12,s18,82,82
 D18,$04,$0a,$12,d18,82,82
 D18,$04,$0a,$12,d18,82,82
 Q18,$04,$05,$12,q18,82,82
 Q18,$04,$05,$12,q18,82,82
+V188B,$04,$17,$12,v18.8b,82,82
+V1816B,$04,$18,$12,v18.16b,82,82
 B19,$04,$01,$13,b19,83,83
 B19,$04,$01,$13,b19,83,83
 H19,$04,$03,$13,h19,83,83
 H19,$04,$03,$13,h19,83,83
 S19,$04,$09,$13,s19,83,83
 S19,$04,$09,$13,s19,83,83
 D19,$04,$0a,$13,d19,83,83
 D19,$04,$0a,$13,d19,83,83
 Q19,$04,$05,$13,q19,83,83
 Q19,$04,$05,$13,q19,83,83
+V198B,$04,$17,$13,v19.8b,83,83
+V1916B,$04,$18,$13,v19.16b,83,83
 B20,$04,$01,$14,b20,84,84
 B20,$04,$01,$14,b20,84,84
 H20,$04,$03,$14,h20,84,84
 H20,$04,$03,$14,h20,84,84
 S20,$04,$09,$14,s20,84,84
 S20,$04,$09,$14,s20,84,84
 D20,$04,$0a,$14,d20,84,84
 D20,$04,$0a,$14,d20,84,84
 Q20,$04,$05,$14,q20,84,84
 Q20,$04,$05,$14,q20,84,84
+V208B,$04,$17,$14,v20.8b,84,84
+V2016B,$04,$18,$14,v20.16b,84,84
 B21,$04,$01,$15,b21,85,85
 B21,$04,$01,$15,b21,85,85
 H21,$04,$03,$15,h21,85,85
 H21,$04,$03,$15,h21,85,85
 S21,$04,$09,$15,s21,85,85
 S21,$04,$09,$15,s21,85,85
 D21,$04,$0a,$15,d21,85,85
 D21,$04,$0a,$15,d21,85,85
 Q21,$04,$05,$15,q21,85,85
 Q21,$04,$05,$15,q21,85,85
+V218B,$04,$17,$15,v21.8b,85,85
+V2116B,$04,$18,$15,v21.16b,85,85
 B22,$04,$01,$16,b22,86,86
 B22,$04,$01,$16,b22,86,86
 H22,$04,$03,$16,h22,86,86
 H22,$04,$03,$16,h22,86,86
 S22,$04,$09,$16,s22,86,86
 S22,$04,$09,$16,s22,86,86
 D22,$04,$0a,$16,d22,86,86
 D22,$04,$0a,$16,d22,86,86
 Q22,$04,$05,$16,q22,86,86
 Q22,$04,$05,$16,q22,86,86
+V228B,$04,$17,$16,v22.8b,86,86
+V2216B,$04,$18,$16,v22.16b,86,86
 B23,$04,$01,$17,b23,87,87
 B23,$04,$01,$17,b23,87,87
 H23,$04,$03,$17,h23,87,87
 H23,$04,$03,$17,h23,87,87
 S23,$04,$09,$17,s23,87,87
 S23,$04,$09,$17,s23,87,87
 D23,$04,$0a,$17,d23,87,87
 D23,$04,$0a,$17,d23,87,87
 Q23,$04,$05,$17,q23,87,87
 Q23,$04,$05,$17,q23,87,87
+V238B,$04,$17,$17,v23.8b,87,87
+V2316B,$04,$18,$17,v23.16b,87,87
 B24,$04,$01,$18,b24,88,88
 B24,$04,$01,$18,b24,88,88
 H24,$04,$03,$18,h24,88,88
 H24,$04,$03,$18,h24,88,88
 S24,$04,$09,$18,s24,88,88
 S24,$04,$09,$18,s24,88,88
 D24,$04,$0a,$18,d24,88,88
 D24,$04,$0a,$18,d24,88,88
 Q24,$04,$05,$18,q24,88,88
 Q24,$04,$05,$18,q24,88,88
+V248B,$04,$17,$18,v24.8b,88,88
+V2416B,$04,$18,$18,v24.16b,88,88
 B25,$04,$01,$19,b25,89,89
 B25,$04,$01,$19,b25,89,89
 H25,$04,$03,$19,h25,89,89
 H25,$04,$03,$19,h25,89,89
 S25,$04,$09,$19,s25,89,89
 S25,$04,$09,$19,s25,89,89
 D25,$04,$0a,$19,d25,89,89
 D25,$04,$0a,$19,d25,89,89
 Q25,$04,$05,$19,q25,89,89
 Q25,$04,$05,$19,q25,89,89
+V258B,$04,$17,$19,v25.8b,89,89
+V2516B,$04,$18,$19,v25.16b,89,89
 B26,$04,$01,$1A,b26,90,90
 B26,$04,$01,$1A,b26,90,90
 H26,$04,$03,$1A,h26,90,90
 H26,$04,$03,$1A,h26,90,90
 S26,$04,$09,$1A,s26,90,90
 S26,$04,$09,$1A,s26,90,90
 D26,$04,$0a,$1A,d26,90,90
 D26,$04,$0a,$1A,d26,90,90
 Q26,$04,$05,$1A,q26,90,90
 Q26,$04,$05,$1A,q26,90,90
+V268B,$04,$17,$1A,v26.8b,90,90
+V2616B,$04,$18,$1A,v26.16b,90,90
 B27,$04,$01,$1B,b27,91,91
 B27,$04,$01,$1B,b27,91,91
 H27,$04,$03,$1B,h27,91,91
 H27,$04,$03,$1B,h27,91,91
 S27,$04,$09,$1B,s27,91,91
 S27,$04,$09,$1B,s27,91,91
 D27,$04,$0a,$1B,d27,91,91
 D27,$04,$0a,$1B,d27,91,91
 Q27,$04,$05,$1B,q27,91,91
 Q27,$04,$05,$1B,q27,91,91
+V278B,$04,$17,$1B,v27.8b,91,91
+V2716B,$04,$18,$1B,v27.16b,91,91
 B28,$04,$01,$1C,b28,92,92
 B28,$04,$01,$1C,b28,92,92
 H28,$04,$03,$1C,h28,92,92
 H28,$04,$03,$1C,h28,92,92
 S28,$04,$09,$1C,s28,92,92
 S28,$04,$09,$1C,s28,92,92
 D28,$04,$0a,$1C,d28,92,92
 D28,$04,$0a,$1C,d28,92,92
 Q28,$04,$05,$1C,q28,92,92
 Q28,$04,$05,$1C,q28,92,92
+V288B,$04,$17,$1C,v28.8b,92,92
+V2816B,$04,$18,$1C,v28.16b,92,92
 B29,$04,$01,$1D,b29,93,93
 B29,$04,$01,$1D,b29,93,93
 H29,$04,$03,$1D,h29,93,93
 H29,$04,$03,$1D,h29,93,93
 S29,$04,$09,$1D,s29,93,93
 S29,$04,$09,$1D,s29,93,93
 D29,$04,$0a,$1D,d29,93,93
 D29,$04,$0a,$1D,d29,93,93
 Q29,$04,$05,$1D,q29,93,93
 Q29,$04,$05,$1D,q29,93,93
+V298B,$04,$17,$1D,v29.8b,93,93
+V2916B,$04,$18,$1D,v29.16b,93,93
 B30,$04,$01,$1E,b30,94,94
 B30,$04,$01,$1E,b30,94,94
 H30,$04,$03,$1E,h30,94,94
 H30,$04,$03,$1E,h30,94,94
 S30,$04,$09,$1E,s30,94,94
 S30,$04,$09,$1E,s30,94,94
 D30,$04,$0a,$1E,d30,94,94
 D30,$04,$0a,$1E,d30,94,94
 Q30,$04,$05,$1E,q30,94,94
 Q30,$04,$05,$1E,q30,94,94
+V308B,$04,$17,$1E,v30.8b,94,94
+V3016B,$04,$18,$1E,v30.16b,94,94
 B31,$04,$01,$1F,b31,95,95
 B31,$04,$01,$1F,b31,95,95
 H31,$04,$03,$1F,h31,95,95
 H31,$04,$03,$1F,h31,95,95
 S31,$04,$09,$1F,s31,95,95
 S31,$04,$09,$1F,s31,95,95
 D31,$04,$0a,$1F,d31,95,95
 D31,$04,$0a,$1F,d31,95,95
 Q31,$04,$05,$1F,q31,95,95
 Q31,$04,$05,$1F,q31,95,95
+V318B,$04,$17,$1F,v31.8b,95,95
+V3116B,$04,$18,$1F,v31.16b,95,95
 
 
 NZCV,$05,$00,$00,nzcv,0,0
 NZCV,$05,$00,$00,nzcv,0,0
 FPCR,$05,$00,$01,fpcr,0,0
 FPCR,$05,$00,$01,fpcr,0,0
 FPSR,$05,$00,$02,fpsr,0,0
 FPSR,$05,$00,$02,fpsr,0,0
 TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0
 TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0
+

+ 106 - 10
compiler/aarch64/aasmcpu.pas

@@ -157,6 +157,8 @@ uses
          oppostfix : TOpPostfix;
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadconditioncode(opidx: longint; const c: tasmcond);
          procedure loadconditioncode(opidx: longint; const c: tasmcond);
+         procedure loadrealconst(opidx: longint; const _value: bestreal);
+
          constructor op_none(op : tasmop);
          constructor op_none(op : tasmop);
 
 
          constructor op_reg(op : tasmop;_op1 : tregister);
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -168,6 +170,7 @@ uses
          constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
          constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
          constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
+         constructor op_reg_realconst(op: tasmop; _op1: tregister; _op2: bestreal);
 
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
@@ -180,7 +183,6 @@ uses
          constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
          constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
          constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
          constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
 
 
-
          { this is for Jmp instructions }
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
 
 
@@ -280,6 +282,19 @@ implementation
       end;
       end;
 
 
 
 
+    procedure taicpu.loadrealconst(opidx:longint;const _value:bestreal);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_realconst then
+              clearop(opidx);
+            val_real:=_value;
+            typ:=top_realconst;
+          end;
+      end;
+
+
 {*****************************************************************************
 {*****************************************************************************
                                  taicpu Constructors
                                  taicpu Constructors
 *****************************************************************************}
 *****************************************************************************}
@@ -382,6 +397,15 @@ implementation
       end;
       end;
 
 
 
 
+    constructor taicpu.op_reg_realconst(op : tasmop; _op1 : tregister; _op2 : bestreal);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadrealconst(1,_op2);
+      end;
+
+
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
        begin
        begin
          inherited create(op);
          inherited create(op);
@@ -528,7 +552,7 @@ implementation
       const
       const
         { invalid sizes for aarch64 are 0 }
         { invalid sizes for aarch64 are 0 }
         subreg2bytesize: array[TSubRegister] of byte =
         subreg2bytesize: array[TSubRegister] of byte =
-          (0,0,0,0,4,8,0,0,0,4,8,0,0,0);
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0,0,0,0,0,0,0,0,0,0,8,16,0);
       var
       var
         scalefactor: byte;
         scalefactor: byte;
       begin
       begin
@@ -554,16 +578,17 @@ implementation
       begin
       begin
         result:=sr_complex;
         result:=sr_complex;
         if not assigned(ref.symboldata) and
         if not assigned(ref.symboldata) and
-           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+           not(ref.refaddr in [addr_pic,addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
           exit;
           exit;
         { can't use pre-/post-indexed mode here (makes no sense either) }
         { can't use pre-/post-indexed mode here (makes no sense either) }
         if ref.addressmode<>AM_OFFSET then
         if ref.addressmode<>AM_OFFSET then
           exit;
           exit;
         { "ldr literal" must be a 32/64 bit LDR and have a symbol }
         { "ldr literal" must be a 32/64 bit LDR and have a symbol }
-        if assigned(ref.symboldata) and
+        if (ref.refaddr=addr_pic) and
            ((op<>A_LDR) or
            ((op<>A_LDR) or
             not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
             not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
-            not assigned(ref.symbol)) then
+            (not assigned(ref.symbol) and
+             not assigned(ref.symboldata))) then
           exit;
           exit;
         { if this is a (got) page offset load, we must have a base register and a
         { if this is a (got) page offset load, we must have a base register and a
           symbol }
           symbol }
@@ -592,7 +617,6 @@ implementation
 
 
     function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
     function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
       var
       var
-        maxoffs: asizeint;
         accesssize: longint;
         accesssize: longint;
       begin
       begin
         result:=sr_internal_illegal;
         result:=sr_internal_illegal;
@@ -867,10 +891,13 @@ implementation
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
       begin
         case opcode of
         case opcode of
-          A_B,A_BL,
+          A_B,A_BL,A_BR,A_BLR,
           A_CMN,A_CMP,
           A_CMN,A_CMP,
           A_CCMN,A_CCMP,
           A_CCMN,A_CCMP,
-          A_TST:
+          A_TST,
+          A_FCMP,A_FCMPE,
+          A_CBZ,A_CBNZ,
+          A_RET:
             result:=operand_read;
             result:=operand_read;
           A_STR,A_STUR:
           A_STR,A_STUR:
             if opnr=0 then
             if opnr=0 then
@@ -903,11 +930,78 @@ implementation
                  { check for pre/post indexed in spilling_get_operation_type_ref }
                  { check for pre/post indexed in spilling_get_operation_type_ref }
                  result:=operand_read;
                  result:=operand_read;
              end;
              end;
+{$ifdef EXTDEBUG}
+           { play save to avoid hard to find bugs, better fail at compile time }
+           A_ADD,
+           A_ADRP,
+           A_AND,
+           A_ASR,
+           A_BFI,
+           A_BFXIL,
+           A_CLZ,
+           A_CSEL,
+           A_CSET,
+           A_CSETM,
+           A_FABS,
+           A_EON,
+           A_EOR,
+           A_FADD,
+           A_FCVT,
+           A_FDIV,
+           A_FMADD,
+           A_FMOV,
+           A_FMSUB,
+           A_FMUL,
+           A_FNEG,
+           A_FNMADD,
+           A_FNMSUB,
+           A_FRINTX,
+           A_FSQRT,
+           A_FSUB,
+           A_ORR,
+           A_LSL,
+           A_LSLV,
+           A_LSR,
+           A_LSRV,
+           A_MOV,
+           A_MOVK,
+           A_MOVN,
+           A_MOVZ,
+           A_MSUB,
+           A_MUL,
+           A_MVN,
+           A_NEG,
+           A_LDR,
+           A_LDUR,
+           A_RBIT,
+           A_ROR,
+           A_RORV,
+           A_SBFX,
+           A_SCVTF,
+           A_FCVTZS,
+           A_SDIV,
+           A_SMULL,
+           A_SUB,
+           A_SXT,
+           A_UBFIZ,
+           A_UBFX,
+           A_UCVTF,
+           A_UDIV,
+           A_UMULL,
+           A_UXT:
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
+           else
+             Internalerror(2019090802);
+{$else EXTDEBUG}
            else
            else
              if opnr=0 then
              if opnr=0 then
                result:=operand_write
                result:=operand_write
              else
              else
                result:=operand_read;
                result:=operand_read;
+{$endif EXTDEBUG}
         end;
         end;
       end;
       end;
 
 
@@ -922,8 +1016,8 @@ implementation
 
 
 
 
     procedure BuildInsTabCache;
     procedure BuildInsTabCache;
-      var
-        i : longint;
+//      var
+//        i : longint;
       begin
       begin
 (*        new(instabcache);
 (*        new(instabcache);
         FillChar(instabcache^,sizeof(tinstabcache),$ff);
         FillChar(instabcache^,sizeof(tinstabcache),$ff);
@@ -1006,6 +1100,7 @@ implementation
 *)
 *)
 
 
     procedure insertpcrelativedata(list,listtoinsert : TAsmList);
     procedure insertpcrelativedata(list,listtoinsert : TAsmList);
+(*
       var
       var
         curinspos,
         curinspos,
         penalty,
         penalty,
@@ -1021,6 +1116,7 @@ implementation
         l : tasmlabel;
         l : tasmlabel;
         doinsert,
         doinsert,
         removeref : boolean;
         removeref : boolean;
+*)
       begin
       begin
 (*
 (*
         curdata:=TAsmList.create;
         curdata:=TAsmList.create;

+ 15 - 4
compiler/aarch64/agcpugas.pas

@@ -50,7 +50,7 @@ unit agcpugas;
 
 
     const
     const
       gas_shiftmode2str : array[tshiftmode] of string[4] = (
       gas_shiftmode2str : array[tshiftmode] of string[4] = (
-        '','lsl','lsr','asr',
+        '','lsl','lsr','asr','ror',
         'uxtb','uxth','uxtw','uxtx',
         'uxtb','uxth','uxtw','uxtx',
         'sxtb','sxth','sxtw','sxtx');
         'sxtb','sxth','sxtw','sxtx');
 
 
@@ -119,9 +119,13 @@ unit agcpugas;
                     result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
                     result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
                   else
                   else
                     result:=linux_addrpage2str[ref.refaddr]+ref.symbol.name
                     result:=linux_addrpage2str[ref.refaddr]+ref.symbol.name
-                end
+                end;
+              addr_pic,
+              { for locals replaced by temp symbols on LLVM }
+              addr_no:
+                result:=ref.symbol.name;
               else
               else
-                internalerror(2015022301);
+                internalerror(2015022302);
             end
             end
           end
           end
         else
         else
@@ -180,6 +184,8 @@ unit agcpugas;
                 result:=result+']';
                 result:=result+']';
               AM_PREINDEXED:
               AM_PREINDEXED:
                 result:=result+']!';
                 result:=result+']!';
+              else
+                ;
             end;
             end;
           end;
           end;
       end;
       end;
@@ -234,6 +240,11 @@ unit agcpugas;
               end
               end
             else
             else
               getopstr:=getreferencestring(asminfo,o.ref^);
               getopstr:=getreferencestring(asminfo,o.ref^);
+          top_realconst:
+            begin
+              str(o.val_real,Result);
+              Result:='#'+Result;
+            end
           else
           else
             internalerror(2014121507);
             internalerror(2014121507);
         end;
         end;
@@ -274,7 +285,7 @@ unit agcpugas;
             idtxt  : 'AS';
             idtxt  : 'AS';
             asmbin : 'as';
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
-            supported_targets : [system_aarch64_linux];
+            supported_targets : [system_aarch64_linux,system_aarch64_android];
             flags : [af_needar,af_smartlink_sections];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             labelprefix : '.L';
             comment : '// ';
             comment : '// ';

+ 513 - 12
compiler/aarch64/aoptcpu.pas

@@ -21,26 +21,54 @@
  ****************************************************************************
  ****************************************************************************
 }
 }
 
 
-
 Unit aoptcpu;
 Unit aoptcpu;
 
 
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
+{ $define DEBUG_AOPTCPU}
+
 Interface
 Interface
 
 
-uses cpubase, aasmtai, aopt, aoptcpub;
+    uses
+      globtype, globals,
+      cutils,
+      cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
 
 
-Type
-  TCpuAsmOptimizer = class(TAsmOptimizer)
-    { uses the same constructor as TAopObj }
-    function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
-    procedure PeepHoleOptPass2;override;
-  End;
+    Type
+      TCpuAsmOptimizer = class(TAsmOptimizer)
+        { uses the same constructor as TAopObj }
+        function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
+        function PostPeepHoleOptsCpu(var p: tai): boolean; override;
+        function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
+        function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
+        function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
+        function LookForPostindexedPattern(p : taicpu) : boolean;
+        procedure DebugMsg(const s : string; p : tai);
+      private
+        function OptPass1Shift(var p: tai): boolean;
+        function OptPostCMP(var p: tai): boolean;
+        function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
+        function OptPass1Data(var p: tai): boolean;
+      End;
 
 
 Implementation
 Implementation
 
 
   uses
   uses
-    aasmbase,aasmcpu,cgbase;
+    aasmbase,
+    aoptutils,
+    cgutils,
+    verbose;
+
+{$ifdef DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
+    begin
+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
+    end;
+{$else DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
+    begin
+    end;
+{$endif DEBUG_AOPTCPU}
 
 
   function CanBeCond(p : tai) : boolean;
   function CanBeCond(p : tai) : boolean;
     begin
     begin
@@ -48,16 +76,489 @@ Implementation
     end;
     end;
 
 
 
 
-  function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
+  function RefsEqual(const r1, r2: treference): boolean;
+    begin
+      refsequal :=
+        (r1.offset = r2.offset) and
+        (r1.base = r2.base) and
+        (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+        (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
+        (r1.relsymbol = r2.relsymbol) and
+        (r1.shiftimm = r2.shiftimm) and
+        (r1.addressmode = r2.addressmode) and
+        (r1.shiftmode = r2.shiftmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        ((op = []) or (taicpu(instr).opcode in op)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+    begin
+      result := (oper.typ = top_reg) and (oper.reg = reg);
+    end;
+
+
+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+    begin
+      result := oper1.typ = oper2.typ;
+
+      if result then
+        case oper1.typ of
+          top_const:
+            Result:=oper1.val = oper2.val;
+          top_reg:
+            Result:=oper1.reg = oper2.reg;
+          top_conditioncode:
+            Result:=oper1.cc = oper2.cc;
+          top_realconst:
+            Result:=oper1.val_real = oper2.val_real;
+          top_ref:
+            Result:=RefsEqual(oper1.ref^, oper2.ref^);
+          else Result:=false;
+        end
+    end;
+
+
+  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    Out Next: tai; reg: TRegister): Boolean;
+    begin
+      Next:=Current;
+      repeat
+        Result:=GetNextInstruction(Next,Next);
+      until not (Result) or
+            not(cs_opt_level3 in current_settings.optimizerswitches) or
+            (Next.typ<>ait_instruction) or
+            RegInInstruction(reg,Next) or
+            is_calljmp(taicpu(Next).opcode);
+    end;
+
+
+  function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
+    var
+      p: taicpu;
+    begin
+      p := taicpu(hp);
+      Result := false;
+      if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
+        exit;
+
+      case p.opcode of
+        { These operands do not write into a register at all }
+        A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
+          exit;
+        {Take care of post/preincremented store and loads, they will change their base register}
+        A_STR, A_LDR:
+          begin
+            Result := false;
+            { actually, this does not apply here because post-/preindexed does not mean that a register
+              is loaded with a new value, it is only modified
+              (taicpu(p).oper[1]^.typ=top_ref) and
+              (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+              (taicpu(p).oper[1]^.ref^.base = reg);
+            }
+            { STR does not load into it's first register }
+            if p.opcode = A_STR then
+              exit;
+          end;
+        else
+          ;
+      end;
+
+      if Result then
+        exit;
+
+      case p.oper[0]^.typ of
+        top_reg:
+          Result := (p.oper[0]^.reg = reg);
+        top_ref:
+          Result :=
+            (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+            (taicpu(p).oper[0]^.ref^.base = reg);
+        else
+          ;
+      end;
+    end;
+
+
+  function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
+    var
+      p: taicpu;
+      i: longint;
+    begin
+      instructionLoadsFromReg := false;
+      if not (assigned(hp) and (hp.typ = ait_instruction)) then
+        exit;
+      p:=taicpu(hp);
+
+      i:=1;
+
+      { Start on oper[0]? }
+      if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
+        i:=0;
+
+      while(i<p.ops) do
+        begin
+          case p.oper[I]^.typ of
+            top_reg:
+              Result := (p.oper[I]^.reg = reg);
+            top_ref:
+              Result :=
+                (p.oper[I]^.ref^.base = reg) or
+                (p.oper[I]^.ref^.index = reg);
+            else
+              ;
+          end;
+          { Bailout if we found something }
+          if Result then
+            exit;
+          Inc(I);
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
+    var
+      alloc,
+      dealloc : tai_regalloc;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if MatchInstruction(movp, A_MOV, [PF_None]) and
+        (taicpu(p).ops>=3) and
+        { We can't optimize if there is a shiftop }
+        (taicpu(movp).ops=2) and
+        MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
+        { don't mess with moves to fp }
+        (taicpu(movp).oper[0]^.reg<>NR_FP) and
+        { the destination register of the mov might not be used beween p and movp }
+        not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+        { Take care to only do this for instructions which REALLY load to the first register.
+          Otherwise
+            str reg0, [reg1]
+            mov reg2, reg0
+          will be optimized to
+            str reg2, [reg1]
+        }
+        RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
+        begin
+          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
+          if assigned(dealloc) then
+            begin
+              DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
+              result:=true;
+
+              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
+                and remove it if possible }
+              asml.Remove(dealloc);
+              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  alloc.free;
+                  dealloc.free;
+                end
+              else
+                asml.InsertAfter(dealloc,p);
+
+              { try to move the allocation of the target register }
+              GetLastInstruction(movp,hp1);
+              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  asml.InsertBefore(alloc,p);
+                  { adjust used regs }
+                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
+                end;
+
+              { finally get rid of the mov }
+              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              then
+                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
+              asml.remove(movp);
+              movp.free;
+            end;
+        end;
+    end;
+
+
+  {
+    optimize
+      ldr/str regX,[reg1]
+      ...
+      add/sub reg1,reg1,regY/const
+
+      into
+
+      ldr/str regX,[reg1], regY/const
+  }
+  function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
+    var
+      hp1 : tai;
+    begin
+      Result:=false;
+      if (p.oper[1]^.typ = top_ref) and
+        (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
+        (p.oper[1]^.ref^.index=NR_NO) and
+        (p.oper[1]^.ref^.offset=0) and
+        GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
+        { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
+        MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
+        (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
+        (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
+        (
+         { valid offset? }
+         (taicpu(hp1).oper[2]^.typ=top_const) and
+         (taicpu(hp1).oper[2]^.val>=-256) and
+         (abs(taicpu(hp1).oper[2]^.val)<256)
+        ) and
+        { don't apply the optimization if the base register is loaded }
+        (getsupreg(p.oper[0]^.reg)<>getsupreg(p.oper[1]^.ref^.base)) and
+        not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
+        not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
+          p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
+          if taicpu(hp1).opcode=A_ADD then
+            p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
+          else
+            p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
+          asml.Remove(hp1);
+          hp1.Free;
+          Result:=true;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
     var
     var
-      next1: tai;
+      hp1,hp2: tai;
+      I2, I: Integer;
+      shifterop: tshifterop;
+    begin
+      Result:=false;
+      { This folds shifterops into following instructions
+        <shiftop> r0, r1, #imm
+        <op> r2, r3, r0
+
+        to
+
+        <op> r2, r3, r1, <shiftop> #imm
+      }
+      { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
+      if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
+         MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
+         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+         MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
+                                A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
+                                A_SUB, A_TST], [PF_None]) and
+         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
+         (taicpu(hp1).ops >= 2) and
+         { Currently we can't fold into another shifterop }
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
+         { SP does not work completely with shifted registers, as I didn't find the exact rules,
+           we do not operate on SP }
+         (taicpu(hp1).oper[0]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[1]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
+         { reg1 might not be modified inbetween }
+         not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
+         (
+           { Only ONE of the two src operands is allowed to match }
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
+         ) and
+         { for SUB, the last operand must match, there is no RSB on AArch64 }
+         ((taicpu(hp1).opcode<>A_SUB) or
+          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
+        begin
+          { for the two operand instructions, start also at the second operand as they are not always commutative
+            (depends on the flags tested laster on) and thus the operands cannot swapped }
+          I2:=1;
+          for I:=I2 to taicpu(hp1).ops-1 do
+            if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
+              begin
+                { If the parameter matched on the second op from the RIGHT
+                  we have to switch the parameters, this will not happen for CMP
+                  were we're only evaluating the most right parameter
+                }
+                shifterop_reset(shifterop);
+                case taicpu(p).opcode of
+                  A_LSL:
+                    shifterop.shiftmode:=SM_LSL;
+                  A_ROR:
+                    shifterop.shiftmode:=SM_ROR;
+                  A_LSR:
+                    shifterop.shiftmode:=SM_LSR;
+                  A_ASR:
+                    shifterop.shiftmode:=SM_ASR;
+                  else
+                    InternalError(2019090401);
+                end;
+                shifterop.shiftimm:=taicpu(p).oper[2]^.val;
+
+                if I <> taicpu(hp1).ops-1 then
+                  begin
+                    if taicpu(hp1).ops = 3 then
+                      hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
+                           taicpu(p).oper[1]^.reg, shifterop)
+                    else
+                      hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                           shifterop);
+                  end
+                else
+                  if taicpu(hp1).ops = 3 then
+                    hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
+                         taicpu(p).oper[1]^.reg,shifterop)
+                  else
+                    hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                         shifterop);
+
+                taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+                asml.insertbefore(hp2, hp1);
+                GetNextInstruction(p, hp2);
+                asml.remove(p);
+                asml.remove(hp1);
+                p.free;
+                hp1.free;
+                p:=hp2;
+                DebugMsg('Peephole FoldShiftProcess done', p);
+                Result:=true;
+                break;
+              end;
+        end
+      else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
+        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
+        Result:=true;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
+    var
+      hp1: tai;
+    begin
+      result:=false;
+      if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean;
+    var
+     hp1,hp2: tai;
+    begin
+      Result:=false;
+      if MatchOpType(taicpu(p),top_reg,top_const) and
+        (taicpu(p).oper[1]^.val=0) and
+        GetNextInstruction(p,hp1) and
+        MatchInstruction(hp1,A_B,[PF_None]) and
+        (taicpu(hp1).condition in [C_EQ,C_NE]) then
+        begin
+          case taicpu(hp1).condition of
+            C_NE:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            C_EQ:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            else
+              Internalerror(2019090801);
+          end;
+          taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+          asml.insertbefore(hp2, hp1);
+
+          asml.remove(p);
+          asml.remove(hp1);
+          p.free;
+          hp1.free;
+          p:=hp2;
+          DebugMsg('Peephole CMPB.E/NE2CBNZ/CBZ done', p);
+          Result:=true;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     begin
     begin
       result := false;
       result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_LDR:
+              begin
+                Result:=LookForPostindexedPattern(taicpu(p));
+              end;
+            A_STR:
+              begin
+                Result:=LookForPostindexedPattern(taicpu(p));
+              end;
+            A_LSR,
+            A_ROR,
+            A_ASR,
+            A_LSL:
+              Result:=OptPass1Shift(p);
+            A_ADD,
+            A_ADC,
+            A_SUB,
+            A_SBC,
+            A_AND,
+            A_BIC,
+            A_EOR,
+            A_ORR,
+            A_MUL:
+              Result:=OptPass1Data(p);
+            else
+              ;
+          end;
+        end;
     end;
     end;
 
 
 
 
-  procedure TCpuAsmOptimizer.PeepHoleOptPass2;
+  function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
     begin
     begin
+      result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_CMP:
+              Result:=OptPostCMP(p);
+            else
+              ;
+          end;
+        end;
     end;
     end;
 
 
 begin
 begin

+ 2 - 4
compiler/aarch64/aoptcpub.pas

@@ -76,10 +76,6 @@ Const
 
 
   MaxCh = 3;
   MaxCh = 3;
 
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 4;
-
 {Oper index of operand that contains the source (reference) with a load }
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 {instruction                                                            }
 
 
@@ -146,6 +142,8 @@ Implementation
                   exit
                   exit
                 end;
                 end;
             end;
             end;
+          else
+            ;
         end;
         end;
     end;
     end;
 
 

+ 163 - 53
compiler/aarch64/cgcpu.pas

@@ -100,6 +100,8 @@ interface
         procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
         procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
         procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
         procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
         procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
         procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
+        procedure g_profilecode(list: TAsmList);override;
        private
        private
         function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
         function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
         procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
         procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
@@ -185,7 +187,7 @@ implementation
               href.refaddr:=addr_gotpage;
               href.refaddr:=addr_gotpage;
             list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
             list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
             { load the GOT entry (= address of the variable) }
             { load the GOT entry (= address of the variable) }
-            reference_reset_base(href,preferred_newbasereg,0,sizeof(pint),[]);
+            reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
             href.symbol:=ref.symbol;
             href.symbol:=ref.symbol;
             { code symbols defined in the current compilation unit do not
             { code symbols defined in the current compilation unit do not
               have to be accessed via the GOT }
               have to be accessed via the GOT }
@@ -245,7 +247,7 @@ implementation
                       so.shiftmode:=ref.shiftmode;
                       so.shiftmode:=ref.shiftmode;
                       so.shiftimm:=ref.shiftimm;
                       so.shiftimm:=ref.shiftimm;
                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
-                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment,ref.volatility);
+                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
                       { possibly still an invalid offset -> fall through }
                       { possibly still an invalid offset -> fall through }
                     end
                     end
                   else if ref.offset<>0 then
                   else if ref.offset<>0 then
@@ -291,7 +293,7 @@ implementation
                     end
                     end
                   else
                   else
                     a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
                     a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
-                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment,ref.volatility);
+                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
                   { fall through to the handling of base + offset, since the
                   { fall through to the handling of base + offset, since the
                     offset may still be too big }
                     offset may still be too big }
                 end;
                 end;
@@ -379,11 +381,9 @@ implementation
                                 a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
                                 a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
                                 ref.offset:=0;
                                 ref.offset:=0;
                               end;
                               end;
-                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment,ref.volatility);
+                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
                           end;
                           end;
                       end
                       end
-                    else
-                      internalerror(2014110904);
                   end;
                   end;
                 end;
                 end;
               A_LDP,A_STP:
               A_LDP,A_STP:
@@ -407,7 +407,7 @@ implementation
                             preferred_newbasereg:=getaddressregister(list);
                             preferred_newbasereg:=getaddressregister(list);
                       end;
                       end;
                       a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
                       a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
-                      reference_reset_base(ref,preferred_newbasereg,0,ref.alignment,ref.volatility);
+                      reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
                     end
                     end
                 end;
                 end;
               A_LDUR,A_STUR:
               A_LDUR,A_STUR:
@@ -429,7 +429,7 @@ implementation
         if preferred_newbasereg=NR_NO then
         if preferred_newbasereg=NR_NO then
           preferred_newbasereg:=getaddressregister(list);
           preferred_newbasereg:=getaddressregister(list);
         a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
         a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
-        reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset),ref.volatility);
+        reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
       end;
       end;
 
 
 
 
@@ -702,7 +702,10 @@ implementation
         hreg: tregister;
         hreg: tregister;
       begin
       begin
         if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
         if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
-          fromsize:=tosize
+          begin
+            fromsize:=tosize;
+            reg:=makeregsize(list,reg,fromsize);
+          end
         { have a 32 bit register but need a 64 bit one? }
         { have a 32 bit register but need a 64 bit one? }
         else if tosize in [OS_64,OS_S64] then
         else if tosize in [OS_64,OS_S64] then
           begin
           begin
@@ -807,35 +810,80 @@ implementation
     procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
     procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
       var
       var
         href: treference;
         href: treference;
-        hreg1, hreg2, tmpreg: tregister;
+        hreg1, hreg2, tmpreg,tmpreg2: tregister;
+        i : Integer;
       begin
       begin
-        if fromsize in [OS_64,OS_S64] then
-          begin
-            { split into two 32 bit loads }
-            hreg1:=getintregister(list,OS_32);
-            hreg2:=getintregister(list,OS_32);
-            if target_info.endian=endian_big then
-              begin
-                tmpreg:=hreg1;
-                hreg1:=hreg2;
-                hreg2:=tmpreg;
-              end;
-            { can we use LDP? }
-            if (ref.alignment=4) and
-               (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
-              list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
-            else
-              begin
-                a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
-                href:=ref;
-                inc(href.offset,4);
-                a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
-              end;
-            a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
-            list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
-          end
-       else
-         inherited;
+        case fromsize of
+          OS_64,OS_S64:
+            begin
+              { split into two 32 bit loads }
+              hreg1:=getintregister(list,OS_32);
+              hreg2:=getintregister(list,OS_32);
+              if target_info.endian=endian_big then
+                begin
+                  tmpreg:=hreg1;
+                  hreg1:=hreg2;
+                  hreg2:=tmpreg;
+                end;
+              { can we use LDP? }
+              if (ref.alignment=4) and
+                 (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+              else
+                begin
+                  a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                  href:=ref;
+                  inc(href.offset,4);
+                  a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+                end;
+              a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
+              list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+            end;
+          OS_16,OS_S16,
+          OS_32,OS_S32:
+            begin
+              if ref.alignment=2 then
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-2);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset,2)
+                      else
+                        inc(href.offset,2);
+                      a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end
+              else
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-1);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to tcgsize2size[fromsize]-1 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset)
+                      else
+                        inc(href.offset);
+                      a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end;
+            end;
+          else
+            inherited;
+        end;
       end;
       end;
 
 
 
 
@@ -977,6 +1025,7 @@ implementation
             { Notify the register allocator that we have written a move
             { Notify the register allocator that we have written a move
               instruction so it can try to eliminate it. }
               instruction so it can try to eliminate it. }
             add_move_instruction(instr);
             add_move_instruction(instr);
+            { FMOV cannot generate a floating point exception }
           end
           end
         else
         else
           begin
           begin
@@ -984,6 +1033,7 @@ implementation
                (reg_cgsize(reg2)<>tosize) then
                (reg_cgsize(reg2)<>tosize) then
               internalerror(2014110913);
               internalerror(2014110913);
             instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
             instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+            maybe_check_for_fpu_exception(list);
           end;
           end;
         list.Concat(instr);
         list.Concat(instr);
       end;
       end;
@@ -1037,13 +1087,19 @@ implementation
 
 
 
 
      procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
      procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       var
+         r : tregister;
        begin
        begin
          if not shufflescalar(shuffle) then
          if not shufflescalar(shuffle) then
            internalerror(2014122802);
            internalerror(2014122802);
          if not(tcgsize2size[fromsize] in [4,8]) or
          if not(tcgsize2size[fromsize] in [4,8]) or
-            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+            (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
            internalerror(2014122804);
            internalerror(2014122804);
-         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+         if tcgsize2size[fromsize]<tcgsize2size[tosize] then
+           r:=makeregsize(intreg,fromsize)
+         else
+           r:=intreg;
+         list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
        end;
        end;
 
 
 
 
@@ -1073,18 +1129,15 @@ implementation
 
 
     procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
     procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       var
       var
-        bitsize,
-        signbit: longint;
+        bitsize: longint;
       begin
       begin
         if srcsize in [OS_64,OS_S64] then
         if srcsize in [OS_64,OS_S64] then
           begin
           begin
             bitsize:=64;
             bitsize:=64;
-            signbit:=6;
           end
           end
         else
         else
           begin
           begin
             bitsize:=32;
             bitsize:=32;
-            signbit:=5;
           end;
           end;
         { source is 0 -> dst will have to become 255 }
         { source is 0 -> dst will have to become 255 }
         list.concat(taicpu.op_reg_const(A_CMP,src,0));
         list.concat(taicpu.op_reg_const(A_CMP,src,0));
@@ -1254,6 +1307,8 @@ implementation
               a_load_const_reg(list,size,a,dst);
               a_load_const_reg(list,size,a,dst);
               exit;
               exit;
             end;
             end;
+          else
+            ;
         end;
         end;
         case op of
         case op of
           OP_ADD,
           OP_ADD,
@@ -1402,6 +1457,8 @@ implementation
                     check for overflow) }
                     check for overflow) }
                   internalerror(2014122101);
                   internalerror(2014122101);
                 end;
                 end;
+              else
+                internalerror(2019050936);
             end;
             end;
           end;
           end;
         a_op_reg_reg_reg(list,op,size,src1,src2,dst);
         a_op_reg_reg_reg(list,op,size,src1,src2,dst);
@@ -1522,7 +1579,7 @@ implementation
         pairreg: tregister;
         pairreg: tregister;
       begin
       begin
         result:=0;
         result:=0;
-        reference_reset_base(ref,NR_SP,-16,16,[]);
+        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_PREINDEXED;
         ref.addressmode:=AM_PREINDEXED;
         pairreg:=NR_NO;
         pairreg:=NR_NO;
         { store all used registers pairwise }
         { store all used registers pairwise }
@@ -1570,7 +1627,7 @@ implementation
         localsize:=align(localsize,16);
         localsize:=align(localsize,16);
 
 
         { save stack pointer and return address }
         { save stack pointer and return address }
-        reference_reset_base(ref,NR_SP,-16,16,[]);
+        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_PREINDEXED;
         ref.addressmode:=AM_PREINDEXED;
         list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
         list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
         { initialise frame pointer }
         { initialise frame pointer }
@@ -1646,7 +1703,7 @@ implementation
         pairreg: tregister;
         pairreg: tregister;
         regcount: longint;
         regcount: longint;
       begin
       begin
-        reference_reset_base(ref,NR_SP,16,16,[]);
+        reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_POSTINDEXED;
         ref.addressmode:=AM_POSTINDEXED;
         { highest reg stored twice? }
         { highest reg stored twice? }
         regcount:=0;
         regcount:=0;
@@ -1686,7 +1743,14 @@ implementation
         regsstored: boolean;
         regsstored: boolean;
         sr: tsuperregister;
         sr: tsuperregister;
       begin
       begin
-        if not nostackframe then
+        if not(nostackframe) and
+          { we do not need an exit stack frame when we never return
+
+            * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
+            * the entry stack frame must be normally generated because the subroutine could be still left by
+              an exception and then the unwinding code might need to restore the registers stored by the entry code
+          }
+          not(po_noreturn in current_procinfo.procdef.procoptions) then
           begin
           begin
             { if no registers have been stored, we don't have to subtract the
             { if no registers have been stored, we don't have to subtract the
               allocated temp space from the stack pointer }
               allocated temp space from the stack pointer }
@@ -1717,7 +1781,7 @@ implementation
               a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
               a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
 
 
             { restore framepointer and return address }
             { restore framepointer and return address }
-            reference_reset_base(ref,NR_SP,16,16,[]);
+            reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
             ref.addressmode:=AM_POSTINDEXED;
             ref.addressmode:=AM_POSTINDEXED;
             list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
             list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
           end;
           end;
@@ -1744,9 +1808,9 @@ implementation
         paraloc1.init;
         paraloc1.init;
         paraloc2.init;
         paraloc2.init;
         paraloc3.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -1918,12 +1982,12 @@ implementation
           basereplaced:=true;
           basereplaced:=true;
           if forcepostindexing then
           if forcepostindexing then
             begin
             begin
-              reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment,ref.volatility);
+              reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
               ref.addressmode:=AM_POSTINDEXED;
               ref.addressmode:=AM_POSTINDEXED;
             end
             end
           else
           else
             begin
             begin
-              reference_reset_base(ref,tmpreg,0,ref.alignment,ref.volatility);
+              reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
               ref.addressmode:=AM_OFFSET;
               ref.addressmode:=AM_OFFSET;
             end
             end
         end;
         end;
@@ -2203,6 +2267,52 @@ implementation
       end;
       end;
 
 
 
 
+    procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l1,l2: TAsmLabel;
+      begin
+        { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
+            list.concat(taicpu.op_reg_const(A_TST,r,$1f));
+            current_asmdata.getjumplabel(l1);
+            current_asmdata.getjumplabel(l2);
+            ai:=taicpu.op_sym(A_B,l1);
+            ai.is_jmp:=true;
+            ai.condition:=C_NE;
+            list.concat(ai);
+            list.concat(taicpu.op_reg_const(A_TST,r,$80));
+            ai:=taicpu.op_sym(A_B,l2);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            a_label(list,l1);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l2);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_aarch64_linux then
+          begin
+            list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
+            a_call_name(list,'_mcount',false);
+          end
+        else
+          internalerror(2020021901);
+      end;
+
 
 
     procedure create_codegen;
     procedure create_codegen;
       begin
       begin

+ 76 - 24
compiler/aarch64/cpubase.pas

@@ -48,6 +48,8 @@ unit cpubase;
     type
     type
       TAsmOp= {$i a64op.inc}
       TAsmOp= {$i a64op.inc}
 
 
+      TAsmOps = set of TAsmOp;
+
       { This should define the array of instructions as string }
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
       op2strtable=array[tasmop] of string[11];
 
 
@@ -104,7 +106,7 @@ unit cpubase;
       std_param_align = 8;
       std_param_align = 8;
 
 
       { TODO: Calculate bsstart}
       { TODO: Calculate bsstart}
-      regnumber_count_bsstart = 128;
+      regnumber_count_bsstart = 256;
 
 
       regnumber_table : array[tregisterindex] of tregister = (
       regnumber_table : array[tregisterindex] of tregister = (
         {$i ra64num.inc}
         {$i ra64num.inc}
@@ -121,9 +123,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
 {*****************************************************************************
                           Instruction post fixes
                           Instruction post fixes
 *****************************************************************************}
 *****************************************************************************}
@@ -200,7 +199,7 @@ unit cpubase;
       tshiftmode = (SM_None,
       tshiftmode = (SM_None,
                     { shifted register instructions. LSL can also be used for
                     { shifted register instructions. LSL can also be used for
                       the index register of certain loads/stores }
                       the index register of certain loads/stores }
-                    SM_LSL,SM_LSR,SM_ASR,
+                    SM_LSL,SM_LSR,SM_ASR,SM_ROR,
                     { extended register instructions: zero/sign extension +
                     { extended register instructions: zero/sign extension +
                         optional shift (interpreted as LSL after extension)
                         optional shift (interpreted as LSL after extension)
                        -- the index register of certain loads/stores can be
                        -- the index register of certain loads/stores can be
@@ -305,25 +304,6 @@ unit cpubase;
       NR_DEFAULTFLAGS = NR_NZCV;
       NR_DEFAULTFLAGS = NR_NZCV;
       RS_DEFAULTFLAGS = RS_NZCV;
       RS_DEFAULTFLAGS = RS_NZCV;
 
 
-{*****************************************************************************
-                       GCC /ABI linking information
-*****************************************************************************}
-
-    const
-      { Registers which must be saved when calling a routine declared as
-        cppdecl, cdecl, stdcall, safecall, palmossyscall. The registers
-        saved should be the ones as defined in the target ABI and / or GCC.
-
-        This value can be deduced from the CALLED_USED_REGISTERS array in the
-        GCC source.
-      }
-      saved_standard_registers : array[0..9] of tsuperregister =
-        (RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28);
-      saved_mm_registers : array[0..7] of tsuperregister = (RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15);
-
-      { this is only for the generic code which is not used for this architecture }
-      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
-
 {*****************************************************************************
 {*****************************************************************************
                                   Helpers
                                   Helpers
 *****************************************************************************}
 *****************************************************************************}
@@ -341,11 +321,17 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
 
     function dwarf_reg(r:tregister):shortint;
     function dwarf_reg(r:tregister):shortint;
+    function dwarf_reg_no_error(r:tregister):shortint;
+    function eh_return_data_regno(nr: longint): longint;
 
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
     function is_shifter_const(d: aint; size: tcgsize): boolean;
+    function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
 
 
 
 
   implementation
   implementation
@@ -384,8 +370,11 @@ unit cpubase;
           R_MMREGISTER:
           R_MMREGISTER:
             begin
             begin
               case s of
               case s of
+                { records }
+                OS_32,
                 OS_F32:
                 OS_F32:
                   cgsize2subreg:=R_SUBMMS;
                   cgsize2subreg:=R_SUBMMS;
+                OS_64,
                 OS_F64:
                 OS_F64:
                   cgsize2subreg:=R_SUBMMD;
                   cgsize2subreg:=R_SUBMMD;
                 else
                 else
@@ -502,6 +491,26 @@ unit cpubase;
       end;
       end;
 
 
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function dwarf_reg(r:tregister):shortint;
     function dwarf_reg(r:tregister):shortint;
       begin
       begin
         result:=regdwarf_table[findreg_by_number(r)];
         result:=regdwarf_table[findreg_by_number(r)];
@@ -509,6 +518,10 @@ unit cpubase;
           internalerror(200603251);
           internalerror(200603251);
       end;
       end;
 
 
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
 
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
     function is_shifter_const(d: aint; size: tcgsize): boolean;
       var
       var
@@ -618,4 +631,43 @@ unit cpubase;
           end;
           end;
       end;
       end;
 
 
+
+  function eh_return_data_regno(nr: longint): longint;
+    begin
+      if (nr>=0) and (nr<2) then
+        result:=nr
+      else
+        result:=-1;
+    end;
+
+
+  function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+    var
+      singlerec : tcompsinglerec;
+      doublerec : tcompdoublerec;
+    begin
+      Result:=false;
+      case ft of
+        s32real:
+          begin
+            singlerec.value:=value;
+            singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+            Result:=(singlerec.bytes[0]=0) and (singlerec.bytes[1]=0) and ((singlerec.bytes[2] and 7)=0)  and
+              (((singlerec.bytes[3] and $7e)=$40) or ((singlerec.bytes[3] and $7e)=$3e));
+          end;
+        s64real:
+          begin
+            doublerec.value:=value;
+            doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+            Result:=(doublerec.bytes[0]=0) and (doublerec.bytes[1]=0) and (doublerec.bytes[2]=0) and
+                    (doublerec.bytes[3]=0) and (doublerec.bytes[4]=0) and (doublerec.bytes[5]=0) and
+                    ((((doublerec.bytes[6] and $c0)=$0) and ((doublerec.bytes[7] and $7f)=$40)) or
+                     (((doublerec.bytes[6] and $c0)=$c0) and ((doublerec.bytes[7] and $7f)=$3f)));
+          end;
+        else
+          ;
+      end;
+    end;
+
+
 end.
 end.

+ 8 - 4
compiler/aarch64/cpuinfo.pas

@@ -14,6 +14,8 @@
 
 
 Unit CPUInfo;
 Unit CPUInfo;
 
 
+{$i fpcdefs.inc}
+
 Interface
 Interface
 
 
   uses
   uses
@@ -56,13 +58,15 @@ Type
 
 
 
 
 Const
 Const
+   fputypestrllvm : array[tfputype] of string[6] = ('',
+     ''
+   );
+
    { Is there support for dealing with multiple microcontrollers available }
    { Is there support for dealing with multiple microcontrollers available }
    { for this platform? }
    { for this platform? }
    ControllerSupport = false; (* Not yet at least ;-) *)
    ControllerSupport = false; (* Not yet at least ;-) *)
    {# Size of native extended floating point type }
    {# Size of native extended floating point type }
    extended_size = 8;
    extended_size = 8;
-   {# Size of a multimedia register               }
-   mmreg_size = 16;
    { target cpu string (used by compiler options) }
    { target cpu string (used by compiler options) }
    target_cpu_string = 'aarch64';
    target_cpu_string = 'aarch64';
 
 
@@ -108,12 +112,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
 				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
 

+ 7 - 4
compiler/aarch64/cpunode.pas

@@ -31,11 +31,14 @@ implementation
 
 
   uses
   uses
     ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
     ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
-    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,
-    { this not really a node }
-    rgcpu,
     { symtable }
     { symtable }
     symcpu,
     symcpu,
-    aasmdef;
+    aasmdef,
+{$ifndef llvm}
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,ncpucon
+{$else llvm}
+    llvmnode
+{$endif llvm}
+    ;
 
 
 end.
 end.

+ 125 - 85
compiler/aarch64/cpupara.pas

@@ -30,17 +30,19 @@ unit cpupara;
        globtype,globals,
        globtype,globals,
        aasmtai,aasmdata,
        aasmtai,aasmdata,
        cpuinfo,cpubase,cgbase,cgutils,
        cpuinfo,cpubase,cgbase,cgutils,
-       symconst,symbase,symtype,symdef,parabase,paramgr;
+       symconst,symbase,symtype,symdef,parabase,paramgr,armpara;
 
 
     type
     type
-       tcpuparamanager = class(tparamanager)
+       tcpuparamanager = class(tarmgenparamanager)
           function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
+          function get_saved_registers_int(calloption: tproccalloption): tcpuregisterarray; override;
+          function get_saved_registers_mm(calloption: tproccalloption): tcpuregisterarray; override;
           function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
           function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
           function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
           function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
           function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p: tabstractprocdef; side: tcallercallee; varargspara: tvarargsparalist):longint;override;
           function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
           function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
           function param_use_paraloc(const cgpara: tcgpara): boolean; override;
           function param_use_paraloc(const cgpara: tcgpara): boolean; override;
          private
          private
@@ -50,6 +52,7 @@ unit cpupara;
 
 
           procedure init_para_alloc_values;
           procedure init_para_alloc_values;
           procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
           procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+          function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
 
 
           procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
           procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
        end;
        end;
@@ -87,83 +90,25 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
-      var
-        i: longint;
-        sym: tsym;
-        tmpelecount: longint;
+    function tcpuparamanager.get_saved_registers_int(calloption: tproccalloption): tcpuregisterarray;
+      const
+        saved_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..9] of tsuperregister{$endif} =
+          (RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28);
       begin
       begin
-        result:=false;
-        case p.typ of
-          arraydef:
-            begin
-              if is_special_array(p) then
-                exit;
-              { an array of empty records has no influence }
-              if tarraydef(p).elementdef.size=0 then
-                begin
-                  result:=true;
-                  exit
-                end;
-              tmpelecount:=0;
-              if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
-                exit;
-              { tmpelecount now contains the number of hfa elements in a
-                single array element (e.g. 2 if it's an array of a record
-                containing two singles) -> multiply by number of elements
-                in the array }
-              inc(elecount,tarraydef(p).elecount*tmpelecount);
-              if elecount>4 then
-                exit;
-              result:=true;
-            end;
-          floatdef:
-            begin
-              if not assigned(basedef) then
-                basedef:=p
-              else if basedef<>p then
-                exit;
-              inc(elecount);
-              result:=true;
-            end;
-          recorddef:
-            begin
-              for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
-                begin
-                  sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
-                  if sym.typ<>fieldvarsym then
-                    continue;
-                  if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
-                    exit
-                end;
-              result:=true;
-            end;
-          else
-            exit
-        end;
+        result:=saved_regs;
       end;
       end;
 
 
 
 
-    { Returns whether a def is a "homogeneous float array" at the machine level.
-      This means that in the memory layout, the def only consists of maximally
-      4 floating point values that appear consecutively in memory }
-    function is_hfa(p: tdef; out basedef: tdef) : boolean;
-      var
-        elecount: longint;
+    function tcpuparamanager.get_saved_registers_mm(calloption: tproccalloption): tcpuregisterarray;
+      const
+        saved_mm_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..7] of tsuperregister{$endif} =
+          (RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15);
       begin
       begin
-        result:=false;
-        basedef:=nil;
-        elecount:=0;
-        result:=is_hfa_internal(p,basedef,elecount);
-        result:=
-          result and
-          (elecount>0) and
-          (elecount<=4) and
-          (p.size=basedef.size*elecount)
+        result:=saved_mm_regs;
       end;
       end;
 
 
 
 
-    function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
+    function tcpuparamanager.getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
       var
       var
         hfabasedef: tdef;
         hfabasedef: tdef;
       begin
       begin
@@ -251,7 +196,8 @@ unit cpupara;
               then indexed beyond its bounds) }
               then indexed beyond its bounds) }
           arraydef:
           arraydef:
             result:=
             result:=
-              (calloption in cdecl_pocalls) or
+              ((calloption in cdecl_pocalls) and
+               not is_dynamic_array(def)) or
               is_open_array(def) or
               is_open_array(def) or
               is_array_of_const(def) or
               is_array_of_const(def) or
               is_array_constructor(def) or
               is_array_constructor(def) or
@@ -263,6 +209,8 @@ unit cpupara;
             result:=def.size>16;
             result:=def.size>16;
           stringdef :
           stringdef :
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+          else
+            ;
         end;
         end;
       end;
       end;
 
 
@@ -344,6 +292,26 @@ unit cpupara;
          if not assigned(result.location) or
          if not assigned(result.location) or
             not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
             not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
            internalerror(2014113001);
            internalerror(2014113001);
+{$ifndef llvm}
+         {
+           According to ARM64 ABI: "If the size of the argument is less than 8 bytes then
+           the size of the argument is set to 8 bytes. The effect is as if the argument
+           was copied to the least significant bits of a 64-bit register and the remaining
+           bits filled with unspecified values."
+
+           Therefore at caller side force the ordinal result to be always 64-bit, so it
+           will be stripped to the required size and uneeded bits are discarded.
+
+           This is not required for iOS, where the result is zero/sign extended.
+         }
+         if (target_info.abi<>abi_aarch64_darwin) and
+            (side=callerside) and (result.location^.loc = LOC_REGISTER) and
+            (result.def.size<8) and is_ordinal(result.def) then
+           begin
+             result.location^.size:=OS_64;
+             result.location^.def:=u64inttype;
+           end;
+{$endif}
       end;
       end;
 
 
 
 
@@ -381,11 +349,16 @@ unit cpupara;
         if (p.proccalloption in cstylearrayofconst) and
         if (p.proccalloption in cstylearrayofconst) and
            is_array_of_const(paradef) then
            is_array_of_const(paradef) then
           begin
           begin
+            result.size:=OS_NO;
+            result.def:=paradef;
+            result.alignment:=std_param_align;
+            result.intsize:=0;
             paraloc:=result.add_location;
             paraloc:=result.add_location;
             { hack: the paraloc must be valid, but is not actually used }
             { hack: the paraloc must be valid, but is not actually used }
             paraloc^.loc:=LOC_REGISTER;
             paraloc^.loc:=LOC_REGISTER;
             paraloc^.register:=NR_X0;
             paraloc^.register:=NR_X0;
             paraloc^.size:=OS_ADDR;
             paraloc^.size:=OS_ADDR;
+            paraloc^.def:=paradef;
             exit;
             exit;
           end;
           end;
 
 
@@ -403,7 +376,7 @@ unit cpupara;
             else
             else
               paralen:=tcgsize2size[def_cgsize(paradef)];
               paralen:=tcgsize2size[def_cgsize(paradef)];
             loc:=getparaloc(p.proccalloption,paradef);
             loc:=getparaloc(p.proccalloption,paradef);
-            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+            if (paradef.typ in [objectdef,arraydef,recorddef,setdef]) and
                not is_special_array(paradef) and
                not is_special_array(paradef) and
                (varspez in [vs_value,vs_const]) then
                (varspez in [vs_value,vs_const]) then
               paracgsize:=int_cgsize(paralen)
               paracgsize:=int_cgsize(paralen)
@@ -472,6 +445,8 @@ unit cpupara;
                    loc:=LOC_REFERENCE;
                    loc:=LOC_REFERENCE;
                  end;
                  end;
              end;
              end;
+           else
+             ;
          end;
          end;
 
 
          { allocate registers/stack locations }
          { allocate registers/stack locations }
@@ -515,6 +490,44 @@ unit cpupara;
              begin
              begin
                paraloc^.size:=locsize;
                paraloc^.size:=locsize;
                paraloc^.def:=locdef;
                paraloc^.def:=locdef;
+{$ifdef llvm}
+               if not is_ordinal(paradef) then
+                 begin
+                   case locsize of
+                     OS_8,OS_16,OS_32:
+                       begin
+                         paraloc^.size:=OS_64;
+                         paraloc^.def:=u64inttype;
+                       end;
+                     OS_S8,OS_S16,OS_S32:
+                       begin
+                         paraloc^.size:=OS_S64;
+                         paraloc^.def:=s64inttype;
+                       end;
+                     OS_F32:
+                       begin
+                         paraloc^.size:=OS_F32;
+                         paraloc^.def:=s32floattype;
+                       end;
+                     OS_F64:
+                       begin
+                         paraloc^.size:=OS_F64;
+                         paraloc^.def:=s64floattype;
+                       end;
+                     else
+                       begin
+                         if is_record(locdef) or
+                            is_set(locdef) or
+                            ((locdef.typ=arraydef) and
+                             not is_special_array(locdef)) then
+                           begin
+                             paraloc^.size:=OS_64;
+                             paraloc^.def:=u64inttype;
+                           end
+                       end;
+                   end;
+                 end;
+{$endif llvm}
              end;
              end;
 
 
            { paraloc loc }
            { paraloc loc }
@@ -532,12 +545,31 @@ unit cpupara;
                     responsibility to sign or zero-extend arguments having fewer
                     responsibility to sign or zero-extend arguments having fewer
                     than 32 bits, and that unused bits in a register are
                     than 32 bits, and that unused bits in a register are
                     unspecified. In iOS, however, the caller must perform such
                     unspecified. In iOS, however, the caller must perform such
-                    extensions, up to 32 bits." }
-                 if (target_info.abi=abi_aarch64_darwin) and
-                    (side=callerside) and
-                    is_ordinal(paradef) and
-                    (paradef.size<4) then
-                   paraloc^.size:=OS_32;
+                    extensions, up to 32 bits."
+                    Zero extend an argument at caller side for iOS and
+                    ignore the argument's unspecified high bits at callee side for
+                    all other platforms. }
+                 if (paradef.size<4) and is_ordinal(paradef) then
+                   begin
+                     if target_info.abi=abi_aarch64_darwin then
+                       begin
+                         if side=callerside then
+                           begin
+                             paraloc^.size:=OS_32;
+                             paraloc^.def:=u32inttype;
+                           end;
+                       end
+{$ifndef llvm}
+                     else
+                       begin
+                         if side=calleeside then
+                           begin
+                             paraloc^.size:=OS_32;
+                             paraloc^.def:=u32inttype;
+                           end;
+                       end;
+{$endif llvm}
+                   end;
 
 
                  { in case it's a composite, "The argument is passed as though
                  { in case it's a composite, "The argument is passed as though
                    it had been loaded into the registers from a double-word-
                    it had been loaded into the registers from a double-word-
@@ -548,7 +580,7 @@ unit cpupara;
                  if (target_info.endian=endian_big) and
                  if (target_info.endian=endian_big) and
                     not(paraloc^.size in [OS_64,OS_S64]) and
                     not(paraloc^.size in [OS_64,OS_S64]) and
                     (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
                     (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
-                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size]);
+                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size])*8;
                end;
                end;
              LOC_MMREGISTER:
              LOC_MMREGISTER:
                begin
                begin
@@ -562,7 +594,7 @@ unit cpupara;
                   paraloc^.loc:=LOC_REFERENCE;
                   paraloc^.loc:=LOC_REFERENCE;
 
 
                   { the current stack offset may not be properly aligned in
                   { the current stack offset may not be properly aligned in
-                    case we're on Darwin have allocated a non-variadic argument
+                    case we're on Darwin and have allocated a non-variadic argument
                     < 8 bytes previously }
                     < 8 bytes previously }
                   if target_info.abi=abi_aarch64_darwin then
                   if target_info.abi=abi_aarch64_darwin then
                     curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
                     curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
@@ -614,12 +646,12 @@ unit cpupara;
      end;
      end;
 
 
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p: tabstractprocdef; side: tcallercallee; varargspara: tvarargsparalist):longint;
       begin
       begin
         init_para_alloc_values;
         init_para_alloc_values;
 
 
         { non-variadic parameters }
         { non-variadic parameters }
-        create_paraloc_info_intern(p,callerside,p.paras,false);
+        create_paraloc_info_intern(p,side,p.paras,false);
         if p.proccalloption in cstylearrayofconst then
         if p.proccalloption in cstylearrayofconst then
           begin
           begin
             { on Darwin, we cannot use any registers for variadic parameters }
             { on Darwin, we cannot use any registers for variadic parameters }
@@ -629,11 +661,19 @@ unit cpupara;
                 curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
                 curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
               end;
               end;
             { continue loading the parameters  }
             { continue loading the parameters  }
-            create_paraloc_info_intern(p,callerside,varargspara,true);
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  create_paraloc_info_intern(p,side,varargspara,true)
+                else
+                  internalerror(2019021916);
+              end;
             result:=curstackoffset;
             result:=curstackoffset;
           end
           end
         else
         else
           internalerror(200410231);
           internalerror(200410231);
+
+        create_funcretloc_info(p,side);
       end;
       end;
 
 
 begin
 begin

+ 3 - 0
compiler/aarch64/cputarg.pas

@@ -41,6 +41,9 @@ implementation
     {$ifndef NOTARGETBSD}
     {$ifndef NOTARGETBSD}
       ,t_bsd
       ,t_bsd
     {$endif}
     {$endif}
+    {$ifndef NOTARGETANDROID}
+      ,t_android
+    {$endif}
 
 
 {**************************************
 {**************************************
              Assemblers
              Assemblers

+ 11 - 7
compiler/aarch64/hlcgcpu.pas

@@ -45,8 +45,6 @@ interface
       procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
       procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
     end;
     end;
 
 
-  procedure create_hlcodegen;
-
 implementation
 implementation
 
 
   uses
   uses
@@ -64,7 +62,10 @@ implementation
     begin
     begin
       tocgsize:=def_cgsize(tosize);
       tocgsize:=def_cgsize(tosize);
       if (sreg.startbit<>0) or
       if (sreg.startbit<>0) or
-         not(sreg.bitlen in [32,64]) then
+         not((sreg.subsetregsize in [OS_32,OS_S32]) and
+             (sreg.bitlen=32)) or
+         not((sreg.subsetregsize in [OS_64,OS_S64]) and
+             (sreg.bitlen=64)) then
         begin
         begin
           if is_signed(subsetsize) then
           if is_signed(subsetsize) then
             op:=A_SBFX
             op:=A_SBFX
@@ -160,7 +161,7 @@ implementation
       if make_global then
       if make_global then
         list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0,procdef))
         list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0,procdef))
       else
       else
-        list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0,procdef));
+        list.concat(Tai_symbol.Createname_hidden(labelname,AT_FUNCTION,0,procdef));
 
 
       { set param1 interface to self  }
       { set param1 interface to self  }
       procdef.init_paraloc_info(callerside);
       procdef.init_paraloc_info(callerside);
@@ -185,11 +186,11 @@ implementation
           if (procdef.extnumber=$ffff) then
           if (procdef.extnumber=$ffff) then
             Internalerror(200006139);
             Internalerror(200006139);
           { mov  0(%rdi),%rax ; load vmt}
           { mov  0(%rdi),%rax ; load vmt}
-          reference_reset_base(href,voidpointertype,paraloc^.register,0,sizeof(pint),[]);
+          reference_reset_base(href,voidpointertype,paraloc^.register,0,ctempposinvalid,sizeof(pint),[]);
           getcpuregister(list,NR_IP0);
           getcpuregister(list,NR_IP0);
           a_load_ref_reg(list,voidpointertype,voidpointertype,href,NR_IP0);
           a_load_ref_reg(list,voidpointertype,voidpointertype,href,NR_IP0);
           { jmp *vmtoffs(%eax) ; method offs }
           { jmp *vmtoffs(%eax) ; method offs }
-          reference_reset_base(href,voidpointertype,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint),[]);
+          reference_reset_base(href,voidpointertype,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),ctempposinvalid,sizeof(pint),[]);
           op:=A_LDR;
           op:=A_LDR;
           tcgaarch64(cg).make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
           tcgaarch64(cg).make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
           list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
           list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
@@ -219,11 +220,14 @@ implementation
     end;
     end;
 
 
 
 
-  procedure create_hlcodegen;
+  procedure create_hlcodegen_cpu;
     begin
     begin
       hlcg:=thlcgaarch64.create;
       hlcg:=thlcgaarch64.create;
       create_codegen;
       create_codegen;
     end;
     end;
 
 
 
 
+begin
+  chlcgobj:=thlcgaarch64;
+  create_hlcodegen:=@create_hlcodegen_cpu;
 end.
 end.

+ 9 - 0
compiler/aarch64/ncpuadd.pas

@@ -34,6 +34,7 @@ interface
           function  GetResFlags(unsigned:Boolean):TResFlags;
           function  GetResFlags(unsigned:Boolean):TResFlags;
           function  GetFPUResFlags:TResFlags;
           function  GetFPUResFlags:TResFlags;
        protected
        protected
+          function use_fma : boolean;override;
           procedure second_addfloat;override;
           procedure second_addfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpboolean;override;
           procedure second_cmpboolean;override;
@@ -62,6 +63,12 @@ interface
                                taarch64addnode
                                taarch64addnode
 *****************************************************************************}
 *****************************************************************************}
 
 
+    function taarch64addnode.use_fma : boolean;
+      begin
+        Result:=true;
+      end;
+
+
     function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
     function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
       begin
       begin
         case NodeType of
         case NodeType of
@@ -211,6 +218,7 @@ interface
 
 
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
            location.register,left.location.register,right.location.register));
            location.register,left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
       end;
 
 
 
 
@@ -231,6 +239,7 @@ interface
         { signalling compare so we can get exceptions }
         { signalling compare so we can get exceptions }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
              left.location.register,right.location.register));
              left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
       end;
 
 
 
 

+ 0 - 1
compiler/aarch64/ncpucnv.pas

@@ -142,7 +142,6 @@ implementation
   procedure taarch64typeconvnode.second_int_to_bool;
   procedure taarch64typeconvnode.second_int_to_bool;
     var
     var
       resflags: tresflags;
       resflags: tresflags;
-      hlabel: tasmlabel;
     begin
     begin
       if (nf_explicit in flags) and
       if (nf_explicit in flags) and
          not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then
          not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then

+ 90 - 0
compiler/aarch64/ncpucon.pas

@@ -0,0 +1,90 @@
+{
+    Copyright (c) 2005 by Florian Klaempfl
+
+    Code generation for const nodes on the AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpucon;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncgcon,cpubase;
+
+    type
+      taarch64realconstnode = class(tcgrealconstnode)
+        function pass_1 : tnode;override;
+        procedure pass_generate_code;override;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      globtype,globals,
+      cpuinfo,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symdef,
+      defutil,
+      cgbase,cgutils,cgobj,
+      procinfo,
+      ncon;
+
+{*****************************************************************************
+                           TARMREALCONSTNODE
+*****************************************************************************}
+
+    function taarch64realconstnode.pass_1 : tnode;
+      begin
+        result:=nil;
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           expectloc:=LOC_MMREGISTER
+         else
+           result:=Inherited pass_1;
+      end;
+
+
+    procedure taarch64realconstnode.pass_generate_code;
+      var
+        hreg : TRegister;
+      begin
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_realconst(A_FMOV,
+              location.register,value_real));
+          end
+        { cast and compare the bit pattern as we cannot handle -0.0 }
+        else if bestrealrec(value_real).Data=0 then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            hreg:=newreg(R_MMREGISTER,getsupreg(location.register),R_SUBMM16B);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EOR,
+              hreg,hreg,hreg));
+          end
+        else
+          Inherited pass_generate_code;
+      end;
+
+begin
+  crealconstnode:=taarch64realconstnode;
+end.

+ 94 - 1
compiler/aarch64/ncpuinl.pas

@@ -35,6 +35,7 @@ interface
         function first_sqrt_real: tnode; override;
         function first_sqrt_real: tnode; override;
         function first_round_real: tnode; override;
         function first_round_real: tnode; override;
         function first_trunc_real: tnode; override;
         function first_trunc_real: tnode; override;
+        function first_fma : tnode; override;
         procedure second_abs_real; override;
         procedure second_abs_real; override;
         procedure second_sqr_real; override;
         procedure second_sqr_real; override;
         procedure second_sqrt_real; override;
         procedure second_sqrt_real; override;
@@ -42,6 +43,7 @@ interface
         procedure second_round_real; override;
         procedure second_round_real; override;
         procedure second_trunc_real; override;
         procedure second_trunc_real; override;
         procedure second_get_frame; override;
         procedure second_get_frame; override;
+        procedure second_fma; override;
       private
       private
         procedure load_fpu_location;
         procedure load_fpu_location;
       end;
       end;
@@ -53,6 +55,7 @@ implementation
       globtype,verbose,globals,
       globtype,verbose,globals,
       cpuinfo, defutil,symdef,aasmdata,aasmcpu,
       cpuinfo, defutil,symdef,aasmdata,aasmcpu,
       cgbase,cgutils,pass_1,pass_2,
       cgbase,cgutils,pass_1,pass_2,
+      ncal,
       cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
       cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
 
 
 {*****************************************************************************
 {*****************************************************************************
@@ -104,10 +107,22 @@ implementation
       end;
       end;
 
 
 
 
+     function taarch64inlinenode.first_fma : tnode;
+       begin
+         if ((is_double(resultdef)) or (is_single(resultdef))) then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
+         else
+           Result:=inherited first_fma;
+       end;
+
     procedure taarch64inlinenode.second_abs_real;
     procedure taarch64inlinenode.second_abs_real;
       begin
       begin
         load_fpu_location;
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
       end;
 
 
 
 
@@ -115,6 +130,7 @@ implementation
       begin
       begin
         load_fpu_location;
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
       end;
 
 
 
 
@@ -122,13 +138,13 @@ implementation
       begin
       begin
         load_fpu_location;
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
       end;
 
 
 
 
     procedure taarch64inlinenode.second_abs_long;
     procedure taarch64inlinenode.second_abs_long;
       var
       var
         opsize : tcgsize;
         opsize : tcgsize;
-        hp : taicpu;
       begin
       begin
         secondpass(left);
         secondpass(left);
         opsize:=def_cgsize(left.resultdef);
         opsize:=def_cgsize(left.resultdef);
@@ -155,6 +171,7 @@ implementation
         { convert to signed integer rounding towards zero (there's no "round to
         { convert to signed integer rounding towards zero (there's no "round to
           integer using current rounding mode") }
           integer using current rounding mode") }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
       end;
 
 
 
 
@@ -179,6 +196,82 @@ implementation
         location.register:=NR_FRAME_POINTER_REG;
         location.register:=NR_FRAME_POINTER_REG;
       end;
       end;
 
 
+
+    procedure taarch64inlinenode.second_fma;
+      const
+        op : array[false..true,false..true] of TAsmOp =
+          { positive product }
+          (
+           { positive third operand }
+           (A_FMADD,
+           { negative third operand }
+            A_FNMSUB),
+           { negative product }
+            { positive third operand }
+            (A_FMSUB,
+             A_FNMADD)
+           );
+
+      var
+        paraarray : array[1..3] of tnode;
+        i : integer;
+        negop3,
+        negproduct : boolean;
+      begin
+        negop3:=false;
+        negproduct:=false;
+        paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
+        paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+        paraarray[3]:=tcallparanode(parameters).paravalue;
+
+        { check if a neg. node can be removed
+          this is possible because changing the sign of
+          a floating point number does not affect its absolute
+          value in any way
+        }
+        if paraarray[1].nodetype=unaryminusn then
+          begin
+            paraarray[1]:=tunarynode(paraarray[1]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[2].nodetype=unaryminusn then
+          begin
+            paraarray[2]:=tunarynode(paraarray[2]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[3].nodetype=unaryminusn then
+          begin
+            paraarray[3]:=tunarynode(paraarray[3]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negop3:=true;
+          end;
+
+         for i:=1 to 3 do
+          secondpass(paraarray[i]);
+
+        { no memory operand is allowed }
+        for i:=1 to 3 do
+          begin
+            if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
+          end;
+
+        location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(op[negproduct,negop3],
+          location.register,paraarray[1].location.register,paraarray[2].location.register,paraarray[3].location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+      end;
+
+
 begin
 begin
   cinlinenode:=taarch64inlinenode;
   cinlinenode:=taarch64inlinenode;
 end.
 end.

+ 75 - 9
compiler/aarch64/ncpumat.pas

@@ -76,9 +76,58 @@ implementation
          resultreg  : tregister;
          resultreg  : tregister;
          hl : tasmlabel;
          hl : tasmlabel;
          overflowloc: tlocation;
          overflowloc: tlocation;
+         power: longint;
+
+       procedure genOrdConstNodeDiv;
+         var
+           helper1, helper2: TRegister;
+           so: tshifterop;
+         begin
+           if tordconstnode(right).value=0 then
+             internalerror(2020021601)
+           else if tordconstnode(right).value=1 then
+             cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
+           else if (tordconstnode(right).value = int64(-1)) then
+             begin
+               // note: only in the signed case possible..., may overflow
+               if cs_check_overflow in current_settings.localswitches then
+                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
+               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,
+                 resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
+             end
+           else if ispowerof2(tordconstnode(right).value,power) then
+             begin
+               if (is_signed(right.resultdef)) then
+                 begin
+                    helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                    if power = 1 then
+                      helper1:=numerator
+                    else
+                      begin
+                        helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                        cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,63,numerator,helper1);
+                      end;
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_LSR;
+                    so.shiftimm:=64-power;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
+                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,power,helper2,resultreg);
+                  end
+               else
+                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
+             end
+           else
+             { Everything else is handled in the generic code }
+             cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
+               tordconstnode(right).value.svalue,numerator,resultreg);
+         end;
+
       begin
       begin
        secondpass(left);
        secondpass(left);
        secondpass(right);
        secondpass(right);
+       { avoid warning }
+       divider:=NR_NO;
 
 
        { set result location }
        { set result location }
        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
@@ -89,16 +138,32 @@ implementation
        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
        numerator:=left.location.register;
        numerator:=left.location.register;
 
 
-       { load divider in a register }
-       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
-       divider:=right.location.register;
-
-       { start division }
-       if is_signed(left.resultdef) then
-         op:=A_SDIV
+       if (right.nodetype=ordconstn) and
+          ((tordconstnode(right).value=1) or
+           (tordconstnode(right).value=int64(-1)) or
+           (tordconstnode(right).value=0) or
+           ispowerof2(tordconstnode(right).value,power)) then
+         begin
+           genOrdConstNodeDiv;
+           if nodetype=modn then
+             begin
+               divider:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+               cg.a_load_const_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),int64(tordconstnode(right).value),divider);
+             end;
+         end
        else
        else
-         op:=A_UDIV;
-       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         begin
+           { load divider in a register }
+           hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+           divider:=right.location.register;
+
+           { start division }
+           if is_signed(left.resultdef) then
+             op:=A_SDIV
+           else
+             op:=A_UDIV;
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         end;
 
 
        { no divide-by-zero detection available in hardware, emulate (if it's a
        { no divide-by-zero detection available in hardware, emulate (if it's a
          constant, this will have been detected earlier already) }
          constant, this will have been detected earlier already) }
@@ -187,6 +252,7 @@ implementation
         location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
         location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
         location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
         location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
       end;
 
 
 begin
 begin

+ 1 - 1
compiler/aarch64/ncpumem.pas

@@ -113,7 +113,7 @@ implementation
           location.reference.offset:=0;
           location.reference.offset:=0;
           base:=cg.getaddressregister(current_asmdata.CurrAsmList);
           base:=cg.getaddressregister(current_asmdata.CurrAsmList);
           cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,base);
           cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,base);
-          reference_reset_base(location.reference,base,oldoffset,location.reference.alignment,location.reference.volatility);
+          reference_reset_base(location.reference,base,oldoffset,location.reference.temppos,location.reference.alignment,location.reference.volatility);
         end;
         end;
       shift:=BsfDWord(l);
       shift:=BsfDWord(l);
       location.reference.index:=maybe_const_reg;
       location.reference.index:=maybe_const_reg;

+ 121 - 6
compiler/aarch64/ncpuset.pas

@@ -31,9 +31,10 @@ interface
     type
     type
        taarch64casenode = class(tcgcasenode)
        taarch64casenode = class(tcgcasenode)
          protected
          protected
-           procedure optimizevalues(var max_linear_list: aint; var max_dist: aword);override;
+           procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
            function  has_jumptable: boolean;override;
            function  has_jumptable: boolean;override;
-           procedure genjumptable(hp: pcaselabel ;min_, max_: aint);override;
+           procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
+           procedure genlinearlist(hp: pcaselabel);override;
        end;
        end;
 
 
 
 
@@ -56,7 +57,7 @@ implementation
 *****************************************************************************}
 *****************************************************************************}
 
 
 
 
-    procedure taarch64casenode.optimizevalues(var max_linear_list: aint; var max_dist: aword);
+    procedure taarch64casenode.optimizevalues(var max_linear_list: int64; var max_dist: qword);
       begin
       begin
         max_linear_list:=10;
         max_linear_list:=10;
       end;
       end;
@@ -68,7 +69,121 @@ implementation
       end;
       end;
 
 
 
 
-    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: aint);
+    procedure taarch64casenode.genlinearlist(hp : pcaselabel);
+      var
+        first : boolean;
+        lastrange : boolean;
+        last : TConstExprInt;
+        cond_lt,cond_le : tresflags;
+        opcgsize, unsigned_opcgsize: tcgsize;
+
+        procedure genitem(t : pcaselabel);
+          var
+           ovloc: tlocation;
+          begin
+            if assigned(t^.less) then
+              genitem(t^.less);
+            { need we to test the first value }
+            if first and (t^._low>get_min_value(left.resultdef)) then
+              begin
+                cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel);
+              end;
+            if t^._low=t^._high then
+              begin
+                 if t^._low-last=0 then
+                   cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid))
+                 else
+                   begin
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be used }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister,
+                       true,ovloc);
+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid));
+                   end;
+                 last:=t^._low;
+                 lastrange:=false;
+              end
+            else
+              begin
+                 { it begins with the smallest label, if the value }
+                 { is even smaller then jump immediately to the    }
+                 { ELSE-label                                }
+                 if first then
+                   begin
+                      { have we to ajust the first value ? }
+                      if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
+                        begin
+                          { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                            then genlinearlist wouldn't be use }
+                          cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister,
+                            true,ovloc);
+                        end;
+                   end
+                 else
+                   begin
+                     { if there is no unused label between the last and the }
+                     { present label then the lower limit can be checked    }
+                     { immediately. else check the range in between:       }
+
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be use }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister,
+                       true,ovloc);
+                     { no jump necessary here if the new range starts at }
+                     { at the value following the previous one           }
+                     if (aint(t^._low.svalue - last.svalue) <> 1) or
+                        (not lastrange) then
+                       cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel);
+                   end;
+                 { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                   then genlinearlist wouldn't be use }
+                 cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister,
+                   true,ovloc);
+                 cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid));
+
+                 last:=t^._high;
+                 lastrange:=true;
+              end;
+            first:=false;
+            if assigned(t^.greater) then
+              genitem(t^.greater);
+          end;
+
+        begin
+           opcgsize:=def_cgsize(opsize);
+           case opcgsize of
+             OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32:
+               unsigned_opcgsize:=OS_32;
+             OS_64,OS_S64:
+               unsigned_opcgsize:=OS_64;
+             else
+               Internalerror(2019090902);
+           end;
+           if with_sign then
+             begin
+                cond_lt:=F_LT;
+                cond_le:=F_LE;
+             end
+           else
+              begin
+                cond_lt:=F_CC;
+                cond_le:=F_LS;
+             end;
+           { do we need to generate cmps? }
+           if (with_sign and (min_label<0)) then
+             genlinearcmplist(hp)
+           else
+             begin
+                last:=0;
+                lastrange:=false;
+                first:=true;
+                genitem(hp);
+                cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel);
+             end;
+        end;
+
+
+    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
       var
       var
         last: TConstExprInt;
         last: TConstExprInt;
         tablelabel: TAsmLabel;
         tablelabel: TAsmLabel;
@@ -80,7 +195,7 @@ implementation
 
 
       procedure genitem(list:TAsmList;t : pcaselabel);
       procedure genitem(list:TAsmList;t : pcaselabel);
         var
         var
-          i : aint;
+          i : int64;
         begin
         begin
           if assigned(t^.less) then
           if assigned(t^.less) then
             genitem(list,t^.less);
             genitem(list,t^.less);
@@ -128,7 +243,7 @@ implementation
         basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
         basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
         cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
         cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
         { load table slot, 32-bit sign extended }
         { load table slot, 32-bit sign extended }
-        reference_reset_base(href,basereg,0,4,[]);
+        reference_reset_base(href,basereg,0,href.temppos,4,[]);
         href.index:=indexreg;
         href.index:=indexreg;
         href.shiftmode:=SM_LSL;
         href.shiftmode:=SM_LSL;
         href.shiftimm:=2;
         href.shiftimm:=2;

+ 64 - 0
compiler/aarch64/ra64con.inc

@@ -71,161 +71,225 @@ NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
 NR_S0 = tregister($04090000);
 NR_D0 = tregister($040a0000);
 NR_D0 = tregister($040a0000);
 NR_Q0 = tregister($04050000);
 NR_Q0 = tregister($04050000);
+NR_V08B = tregister($04170000);
+NR_V016B = tregister($04180000);
 NR_B1 = tregister($04010001);
 NR_B1 = tregister($04010001);
 NR_H1 = tregister($04030001);
 NR_H1 = tregister($04030001);
 NR_S1 = tregister($04090001);
 NR_S1 = tregister($04090001);
 NR_D1 = tregister($040a0001);
 NR_D1 = tregister($040a0001);
 NR_Q1 = tregister($04050001);
 NR_Q1 = tregister($04050001);
+NR_V18B = tregister($04170001);
+NR_V116B = tregister($04180001);
 NR_B2 = tregister($04010002);
 NR_B2 = tregister($04010002);
 NR_H2 = tregister($04030002);
 NR_H2 = tregister($04030002);
 NR_S2 = tregister($04090002);
 NR_S2 = tregister($04090002);
 NR_D2 = tregister($040a0002);
 NR_D2 = tregister($040a0002);
 NR_Q2 = tregister($04050002);
 NR_Q2 = tregister($04050002);
+NR_V28B = tregister($04170002);
+NR_V216B = tregister($04180002);
 NR_B3 = tregister($04010003);
 NR_B3 = tregister($04010003);
 NR_H3 = tregister($04030003);
 NR_H3 = tregister($04030003);
 NR_S3 = tregister($04090003);
 NR_S3 = tregister($04090003);
 NR_D3 = tregister($040a0003);
 NR_D3 = tregister($040a0003);
 NR_Q3 = tregister($04050003);
 NR_Q3 = tregister($04050003);
+NR_V38B = tregister($04170003);
+NR_V316B = tregister($04180003);
 NR_B4 = tregister($04010004);
 NR_B4 = tregister($04010004);
 NR_H4 = tregister($04030004);
 NR_H4 = tregister($04030004);
 NR_S4 = tregister($04090004);
 NR_S4 = tregister($04090004);
 NR_D4 = tregister($040a0004);
 NR_D4 = tregister($040a0004);
 NR_Q4 = tregister($04050004);
 NR_Q4 = tregister($04050004);
+NR_V48B = tregister($04170004);
+NR_V416B = tregister($04180004);
 NR_B5 = tregister($04010005);
 NR_B5 = tregister($04010005);
 NR_H5 = tregister($04030005);
 NR_H5 = tregister($04030005);
 NR_S5 = tregister($04090005);
 NR_S5 = tregister($04090005);
 NR_D5 = tregister($040a0005);
 NR_D5 = tregister($040a0005);
 NR_Q5 = tregister($04050005);
 NR_Q5 = tregister($04050005);
+NR_V58B = tregister($04170005);
+NR_V516B = tregister($04180005);
 NR_B6 = tregister($04010006);
 NR_B6 = tregister($04010006);
 NR_H6 = tregister($04030006);
 NR_H6 = tregister($04030006);
 NR_S6 = tregister($04090006);
 NR_S6 = tregister($04090006);
 NR_D6 = tregister($040a0006);
 NR_D6 = tregister($040a0006);
 NR_Q6 = tregister($04050006);
 NR_Q6 = tregister($04050006);
+NR_V68B = tregister($04170006);
+NR_V616B = tregister($04180006);
 NR_B7 = tregister($04010007);
 NR_B7 = tregister($04010007);
 NR_H7 = tregister($04030007);
 NR_H7 = tregister($04030007);
 NR_S7 = tregister($04090007);
 NR_S7 = tregister($04090007);
 NR_D7 = tregister($040a0007);
 NR_D7 = tregister($040a0007);
 NR_Q7 = tregister($04050007);
 NR_Q7 = tregister($04050007);
+NR_V78B = tregister($04170007);
+NR_V716B = tregister($04180007);
 NR_B8 = tregister($04010008);
 NR_B8 = tregister($04010008);
 NR_H8 = tregister($04030008);
 NR_H8 = tregister($04030008);
 NR_S8 = tregister($04090008);
 NR_S8 = tregister($04090008);
 NR_D8 = tregister($040a0008);
 NR_D8 = tregister($040a0008);
 NR_Q8 = tregister($04050008);
 NR_Q8 = tregister($04050008);
+NR_V88B = tregister($04170008);
+NR_V816B = tregister($04180008);
 NR_B9 = tregister($04010009);
 NR_B9 = tregister($04010009);
 NR_H9 = tregister($04030009);
 NR_H9 = tregister($04030009);
 NR_S9 = tregister($04090009);
 NR_S9 = tregister($04090009);
 NR_D9 = tregister($040a0009);
 NR_D9 = tregister($040a0009);
 NR_Q9 = tregister($04050009);
 NR_Q9 = tregister($04050009);
+NR_V98B = tregister($04170009);
+NR_V916B = tregister($04180009);
 NR_B10 = tregister($0401000A);
 NR_B10 = tregister($0401000A);
 NR_H10 = tregister($0403000A);
 NR_H10 = tregister($0403000A);
 NR_S10 = tregister($0409000A);
 NR_S10 = tregister($0409000A);
 NR_D10 = tregister($040a000A);
 NR_D10 = tregister($040a000A);
 NR_Q10 = tregister($0405000A);
 NR_Q10 = tregister($0405000A);
+NR_V108B = tregister($0417000A);
+NR_V1016B = tregister($0418000A);
 NR_B11 = tregister($0401000B);
 NR_B11 = tregister($0401000B);
 NR_H11 = tregister($0403000B);
 NR_H11 = tregister($0403000B);
 NR_S11 = tregister($0409000B);
 NR_S11 = tregister($0409000B);
 NR_D11 = tregister($040a000B);
 NR_D11 = tregister($040a000B);
 NR_Q11 = tregister($0405000B);
 NR_Q11 = tregister($0405000B);
+NR_V118B = tregister($0417000B);
+NR_V1116B = tregister($0418000B);
 NR_B12 = tregister($0401000C);
 NR_B12 = tregister($0401000C);
 NR_H12 = tregister($0403000C);
 NR_H12 = tregister($0403000C);
 NR_S12 = tregister($0409000C);
 NR_S12 = tregister($0409000C);
 NR_D12 = tregister($040a000C);
 NR_D12 = tregister($040a000C);
 NR_Q12 = tregister($0405000C);
 NR_Q12 = tregister($0405000C);
+NR_V128B = tregister($0417000C);
+NR_V1216B = tregister($0418000C);
 NR_B13 = tregister($0401000D);
 NR_B13 = tregister($0401000D);
 NR_H13 = tregister($0403000D);
 NR_H13 = tregister($0403000D);
 NR_S13 = tregister($0409000D);
 NR_S13 = tregister($0409000D);
 NR_D13 = tregister($040a000D);
 NR_D13 = tregister($040a000D);
 NR_Q13 = tregister($0405000D);
 NR_Q13 = tregister($0405000D);
+NR_V138B = tregister($0417000D);
+NR_V1316B = tregister($0418000D);
 NR_B14 = tregister($0401000E);
 NR_B14 = tregister($0401000E);
 NR_H14 = tregister($0403000E);
 NR_H14 = tregister($0403000E);
 NR_S14 = tregister($0409000E);
 NR_S14 = tregister($0409000E);
 NR_D14 = tregister($040a000E);
 NR_D14 = tregister($040a000E);
 NR_Q14 = tregister($0405000E);
 NR_Q14 = tregister($0405000E);
+NR_V148B = tregister($0417000E);
+NR_V1416B = tregister($0418000E);
 NR_B15 = tregister($0401000F);
 NR_B15 = tregister($0401000F);
 NR_H15 = tregister($0403000F);
 NR_H15 = tregister($0403000F);
 NR_S15 = tregister($0409000F);
 NR_S15 = tregister($0409000F);
 NR_D15 = tregister($040a000F);
 NR_D15 = tregister($040a000F);
 NR_Q15 = tregister($0405000F);
 NR_Q15 = tregister($0405000F);
+NR_V158B = tregister($0417000F);
+NR_V1516B = tregister($0418000F);
 NR_B16 = tregister($04010010);
 NR_B16 = tregister($04010010);
 NR_H16 = tregister($04030010);
 NR_H16 = tregister($04030010);
 NR_S16 = tregister($04090010);
 NR_S16 = tregister($04090010);
 NR_D16 = tregister($040a0010);
 NR_D16 = tregister($040a0010);
 NR_Q16 = tregister($04050010);
 NR_Q16 = tregister($04050010);
+NR_V168B = tregister($04170010);
+NR_V1616B = tregister($04180010);
 NR_B17 = tregister($04010011);
 NR_B17 = tregister($04010011);
 NR_H17 = tregister($04030011);
 NR_H17 = tregister($04030011);
 NR_S17 = tregister($04090011);
 NR_S17 = tregister($04090011);
 NR_D17 = tregister($040a0011);
 NR_D17 = tregister($040a0011);
 NR_Q17 = tregister($04050011);
 NR_Q17 = tregister($04050011);
+NR_V178B = tregister($04170011);
+NR_V1716B = tregister($04180011);
 NR_B18 = tregister($04010012);
 NR_B18 = tregister($04010012);
 NR_H18 = tregister($04030012);
 NR_H18 = tregister($04030012);
 NR_S18 = tregister($04090012);
 NR_S18 = tregister($04090012);
 NR_D18 = tregister($040a0012);
 NR_D18 = tregister($040a0012);
 NR_Q18 = tregister($04050012);
 NR_Q18 = tregister($04050012);
+NR_V188B = tregister($04170012);
+NR_V1816B = tregister($04180012);
 NR_B19 = tregister($04010013);
 NR_B19 = tregister($04010013);
 NR_H19 = tregister($04030013);
 NR_H19 = tregister($04030013);
 NR_S19 = tregister($04090013);
 NR_S19 = tregister($04090013);
 NR_D19 = tregister($040a0013);
 NR_D19 = tregister($040a0013);
 NR_Q19 = tregister($04050013);
 NR_Q19 = tregister($04050013);
+NR_V198B = tregister($04170013);
+NR_V1916B = tregister($04180013);
 NR_B20 = tregister($04010014);
 NR_B20 = tregister($04010014);
 NR_H20 = tregister($04030014);
 NR_H20 = tregister($04030014);
 NR_S20 = tregister($04090014);
 NR_S20 = tregister($04090014);
 NR_D20 = tregister($040a0014);
 NR_D20 = tregister($040a0014);
 NR_Q20 = tregister($04050014);
 NR_Q20 = tregister($04050014);
+NR_V208B = tregister($04170014);
+NR_V2016B = tregister($04180014);
 NR_B21 = tregister($04010015);
 NR_B21 = tregister($04010015);
 NR_H21 = tregister($04030015);
 NR_H21 = tregister($04030015);
 NR_S21 = tregister($04090015);
 NR_S21 = tregister($04090015);
 NR_D21 = tregister($040a0015);
 NR_D21 = tregister($040a0015);
 NR_Q21 = tregister($04050015);
 NR_Q21 = tregister($04050015);
+NR_V218B = tregister($04170015);
+NR_V2116B = tregister($04180015);
 NR_B22 = tregister($04010016);
 NR_B22 = tregister($04010016);
 NR_H22 = tregister($04030016);
 NR_H22 = tregister($04030016);
 NR_S22 = tregister($04090016);
 NR_S22 = tregister($04090016);
 NR_D22 = tregister($040a0016);
 NR_D22 = tregister($040a0016);
 NR_Q22 = tregister($04050016);
 NR_Q22 = tregister($04050016);
+NR_V228B = tregister($04170016);
+NR_V2216B = tregister($04180016);
 NR_B23 = tregister($04010017);
 NR_B23 = tregister($04010017);
 NR_H23 = tregister($04030017);
 NR_H23 = tregister($04030017);
 NR_S23 = tregister($04090017);
 NR_S23 = tregister($04090017);
 NR_D23 = tregister($040a0017);
 NR_D23 = tregister($040a0017);
 NR_Q23 = tregister($04050017);
 NR_Q23 = tregister($04050017);
+NR_V238B = tregister($04170017);
+NR_V2316B = tregister($04180017);
 NR_B24 = tregister($04010018);
 NR_B24 = tregister($04010018);
 NR_H24 = tregister($04030018);
 NR_H24 = tregister($04030018);
 NR_S24 = tregister($04090018);
 NR_S24 = tregister($04090018);
 NR_D24 = tregister($040a0018);
 NR_D24 = tregister($040a0018);
 NR_Q24 = tregister($04050018);
 NR_Q24 = tregister($04050018);
+NR_V248B = tregister($04170018);
+NR_V2416B = tregister($04180018);
 NR_B25 = tregister($04010019);
 NR_B25 = tregister($04010019);
 NR_H25 = tregister($04030019);
 NR_H25 = tregister($04030019);
 NR_S25 = tregister($04090019);
 NR_S25 = tregister($04090019);
 NR_D25 = tregister($040a0019);
 NR_D25 = tregister($040a0019);
 NR_Q25 = tregister($04050019);
 NR_Q25 = tregister($04050019);
+NR_V258B = tregister($04170019);
+NR_V2516B = tregister($04180019);
 NR_B26 = tregister($0401001A);
 NR_B26 = tregister($0401001A);
 NR_H26 = tregister($0403001A);
 NR_H26 = tregister($0403001A);
 NR_S26 = tregister($0409001A);
 NR_S26 = tregister($0409001A);
 NR_D26 = tregister($040a001A);
 NR_D26 = tregister($040a001A);
 NR_Q26 = tregister($0405001A);
 NR_Q26 = tregister($0405001A);
+NR_V268B = tregister($0417001A);
+NR_V2616B = tregister($0418001A);
 NR_B27 = tregister($0401001B);
 NR_B27 = tregister($0401001B);
 NR_H27 = tregister($0403001B);
 NR_H27 = tregister($0403001B);
 NR_S27 = tregister($0409001B);
 NR_S27 = tregister($0409001B);
 NR_D27 = tregister($040a001B);
 NR_D27 = tregister($040a001B);
 NR_Q27 = tregister($0405001B);
 NR_Q27 = tregister($0405001B);
+NR_V278B = tregister($0417001B);
+NR_V2716B = tregister($0418001B);
 NR_B28 = tregister($0401001C);
 NR_B28 = tregister($0401001C);
 NR_H28 = tregister($0403001C);
 NR_H28 = tregister($0403001C);
 NR_S28 = tregister($0409001C);
 NR_S28 = tregister($0409001C);
 NR_D28 = tregister($040a001C);
 NR_D28 = tregister($040a001C);
 NR_Q28 = tregister($0405001C);
 NR_Q28 = tregister($0405001C);
+NR_V288B = tregister($0417001C);
+NR_V2816B = tregister($0418001C);
 NR_B29 = tregister($0401001D);
 NR_B29 = tregister($0401001D);
 NR_H29 = tregister($0403001D);
 NR_H29 = tregister($0403001D);
 NR_S29 = tregister($0409001D);
 NR_S29 = tregister($0409001D);
 NR_D29 = tregister($040a001D);
 NR_D29 = tregister($040a001D);
 NR_Q29 = tregister($0405001D);
 NR_Q29 = tregister($0405001D);
+NR_V298B = tregister($0417001D);
+NR_V2916B = tregister($0418001D);
 NR_B30 = tregister($0401001E);
 NR_B30 = tregister($0401001E);
 NR_H30 = tregister($0403001E);
 NR_H30 = tregister($0403001E);
 NR_S30 = tregister($0409001E);
 NR_S30 = tregister($0409001E);
 NR_D30 = tregister($040a001E);
 NR_D30 = tregister($040a001E);
 NR_Q30 = tregister($0405001E);
 NR_Q30 = tregister($0405001E);
+NR_V308B = tregister($0417001E);
+NR_V3016B = tregister($0418001E);
 NR_B31 = tregister($0401001F);
 NR_B31 = tregister($0401001F);
 NR_H31 = tregister($0403001F);
 NR_H31 = tregister($0403001F);
 NR_S31 = tregister($0409001F);
 NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
 NR_D31 = tregister($040a001F);
 NR_Q31 = tregister($0405001F);
 NR_Q31 = tregister($0405001F);
+NR_V318B = tregister($0417001F);
+NR_V3116B = tregister($0418001F);
 NR_NZCV = tregister($05000000);
 NR_NZCV = tregister($05000000);
 NR_FPCR = tregister($05000001);
 NR_FPCR = tregister($05000001);
 NR_FPSR = tregister($05000002);
 NR_FPSR = tregister($05000002);

+ 64 - 0
compiler/aarch64/ra64dwa.inc

@@ -71,6 +71,10 @@
 64,
 64,
 64,
 64,
 64,
 64,
+64,
+64,
+65,
+65,
 65,
 65,
 65,
 65,
 65,
 65,
@@ -81,6 +85,10 @@
 66,
 66,
 66,
 66,
 66,
 66,
+66,
+66,
+67,
+67,
 67,
 67,
 67,
 67,
 67,
 67,
@@ -91,6 +99,10 @@
 68,
 68,
 68,
 68,
 68,
 68,
+68,
+68,
+69,
+69,
 69,
 69,
 69,
 69,
 69,
 69,
@@ -98,129 +110,181 @@
 69,
 69,
 70,
 70,
 70,
 70,
+70                                                             ,
 70,
 70,
 70,
 70,
 70,
 70,
+70,
+71,
 71,
 71,
 71,
 71,
 71,
 71,
 71,
 71,
 71,
 71,
+71,
+72,
 72,
 72,
 72,
 72,
 72,
 72,
 72,
 72,
 72,
 72,
+72,
+73,
 73,
 73,
 73,
 73,
 73,
 73,
 73,
 73,
 73,
 73,
+73,
+74,
 74,
 74,
 74,
 74,
 74,
 74,
 74,
 74,
 74,
 74,
+74,
+75,
 75,
 75,
 75,
 75,
 75,
 75,
 75,
 75,
 75,
 75,
+75,
+76,
 76,
 76,
 76,
 76,
 76,
 76,
 76,
 76,
 76,
 76,
+76,
+77,
 77,
 77,
 77,
 77,
 77,
 77,
 77,
 77,
 77,
 77,
+77,
+78,
 78,
 78,
 78,
 78,
 78,
 78,
 78,
 78,
 78,
 78,
+78,
+79,
 79,
 79,
 79,
 79,
 79,
 79,
 79,
 79,
 79,
 79,
+79,
+80,
 80,
 80,
 80,
 80,
 80,
 80,
 80,
 80,
 80,
 80,
+80,
+81,
 81,
 81,
 81,
 81,
 81,
 81,
 81,
 81,
 81,
 81,
+81,
+82,
 82,
 82,
 82,
 82,
 82,
 82,
 82,
 82,
 82,
 82,
+82,
+83,
 83,
 83,
 83,
 83,
 83,
 83,
 83,
 83,
 83,
 83,
+83,
+84,
 84,
 84,
 84,
 84,
 84,
 84,
 84,
 84,
 84,
 84,
+84,
+85,
 85,
 85,
 85,
 85,
 85,
 85,
 85,
 85,
 85,
 85,
+85,
+86,
 86,
 86,
 86,
 86,
 86,
 86,
 86,
 86,
 86,
 86,
+86,
+87,
 87,
 87,
 87,
 87,
 87,
 87,
 87,
 87,
 87,
 87,
+87,
+88,
 88,
 88,
 88,
 88,
 88,
 88,
 88,
 88,
 88,
 88,
+88,
+89,
 89,
 89,
 89,
 89,
 89,
 89,
 89,
 89,
 89,
 89,
+89,
+90,
 90,
 90,
 90,
 90,
 90,
 90,
 90,
 90,
 90,
 90,
+90,
+91,
 91,
 91,
 91,
 91,
 91,
 91,
 91,
 91,
 91,
 91,
+91,
+92,
 92,
 92,
 92,
 92,
 92,
 92,
 92,
 92,
 92,
 92,
+92,
+93,
 93,
 93,
 93,
 93,
 93,
 93,
 93,
 93,
 93,
 93,
+93,
+94,
 94,
 94,
 94,
 94,
 94,
 94,
 94,
 94,
 94,
 94,
+94,
+95,
+95,
 95,
 95,
 95,
 95,
 95,
 95,

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
 { don't edit, this file is generated from a64reg.dat }
-231
+295

+ 64 - 0
compiler/aarch64/ra64num.inc

@@ -71,161 +71,225 @@ tregister($04030000),
 tregister($04090000),
 tregister($04090000),
 tregister($040a0000),
 tregister($040a0000),
 tregister($04050000),
 tregister($04050000),
+tregister($04170000),
+tregister($04180000),
 tregister($04010001),
 tregister($04010001),
 tregister($04030001),
 tregister($04030001),
 tregister($04090001),
 tregister($04090001),
 tregister($040a0001),
 tregister($040a0001),
 tregister($04050001),
 tregister($04050001),
+tregister($04170001),
+tregister($04180001),
 tregister($04010002),
 tregister($04010002),
 tregister($04030002),
 tregister($04030002),
 tregister($04090002),
 tregister($04090002),
 tregister($040a0002),
 tregister($040a0002),
 tregister($04050002),
 tregister($04050002),
+tregister($04170002),
+tregister($04180002),
 tregister($04010003),
 tregister($04010003),
 tregister($04030003),
 tregister($04030003),
 tregister($04090003),
 tregister($04090003),
 tregister($040a0003),
 tregister($040a0003),
 tregister($04050003),
 tregister($04050003),
+tregister($04170003),
+tregister($04180003),
 tregister($04010004),
 tregister($04010004),
 tregister($04030004),
 tregister($04030004),
 tregister($04090004),
 tregister($04090004),
 tregister($040a0004),
 tregister($040a0004),
 tregister($04050004),
 tregister($04050004),
+tregister($04170004),
+tregister($04180004),
 tregister($04010005),
 tregister($04010005),
 tregister($04030005),
 tregister($04030005),
 tregister($04090005),
 tregister($04090005),
 tregister($040a0005),
 tregister($040a0005),
 tregister($04050005),
 tregister($04050005),
+tregister($04170005),
+tregister($04180005),
 tregister($04010006),
 tregister($04010006),
 tregister($04030006),
 tregister($04030006),
 tregister($04090006),
 tregister($04090006),
 tregister($040a0006),
 tregister($040a0006),
 tregister($04050006),
 tregister($04050006),
+tregister($04170006),
+tregister($04180006),
 tregister($04010007),
 tregister($04010007),
 tregister($04030007),
 tregister($04030007),
 tregister($04090007),
 tregister($04090007),
 tregister($040a0007),
 tregister($040a0007),
 tregister($04050007),
 tregister($04050007),
+tregister($04170007),
+tregister($04180007),
 tregister($04010008),
 tregister($04010008),
 tregister($04030008),
 tregister($04030008),
 tregister($04090008),
 tregister($04090008),
 tregister($040a0008),
 tregister($040a0008),
 tregister($04050008),
 tregister($04050008),
+tregister($04170008),
+tregister($04180008),
 tregister($04010009),
 tregister($04010009),
 tregister($04030009),
 tregister($04030009),
 tregister($04090009),
 tregister($04090009),
 tregister($040a0009),
 tregister($040a0009),
 tregister($04050009),
 tregister($04050009),
+tregister($04170009),
+tregister($04180009),
 tregister($0401000A),
 tregister($0401000A),
 tregister($0403000A),
 tregister($0403000A),
 tregister($0409000A),
 tregister($0409000A),
 tregister($040a000A),
 tregister($040a000A),
 tregister($0405000A),
 tregister($0405000A),
+tregister($0417000A),
+tregister($0418000A),
 tregister($0401000B),
 tregister($0401000B),
 tregister($0403000B),
 tregister($0403000B),
 tregister($0409000B),
 tregister($0409000B),
 tregister($040a000B),
 tregister($040a000B),
 tregister($0405000B),
 tregister($0405000B),
+tregister($0417000B),
+tregister($0418000B),
 tregister($0401000C),
 tregister($0401000C),
 tregister($0403000C),
 tregister($0403000C),
 tregister($0409000C),
 tregister($0409000C),
 tregister($040a000C),
 tregister($040a000C),
 tregister($0405000C),
 tregister($0405000C),
+tregister($0417000C),
+tregister($0418000C),
 tregister($0401000D),
 tregister($0401000D),
 tregister($0403000D),
 tregister($0403000D),
 tregister($0409000D),
 tregister($0409000D),
 tregister($040a000D),
 tregister($040a000D),
 tregister($0405000D),
 tregister($0405000D),
+tregister($0417000D),
+tregister($0418000D),
 tregister($0401000E),
 tregister($0401000E),
 tregister($0403000E),
 tregister($0403000E),
 tregister($0409000E),
 tregister($0409000E),
 tregister($040a000E),
 tregister($040a000E),
 tregister($0405000E),
 tregister($0405000E),
+tregister($0417000E),
+tregister($0418000E),
 tregister($0401000F),
 tregister($0401000F),
 tregister($0403000F),
 tregister($0403000F),
 tregister($0409000F),
 tregister($0409000F),
 tregister($040a000F),
 tregister($040a000F),
 tregister($0405000F),
 tregister($0405000F),
+tregister($0417000F),
+tregister($0418000F),
 tregister($04010010),
 tregister($04010010),
 tregister($04030010),
 tregister($04030010),
 tregister($04090010),
 tregister($04090010),
 tregister($040a0010),
 tregister($040a0010),
 tregister($04050010),
 tregister($04050010),
+tregister($04170010),
+tregister($04180010),
 tregister($04010011),
 tregister($04010011),
 tregister($04030011),
 tregister($04030011),
 tregister($04090011),
 tregister($04090011),
 tregister($040a0011),
 tregister($040a0011),
 tregister($04050011),
 tregister($04050011),
+tregister($04170011),
+tregister($04180011),
 tregister($04010012),
 tregister($04010012),
 tregister($04030012),
 tregister($04030012),
 tregister($04090012),
 tregister($04090012),
 tregister($040a0012),
 tregister($040a0012),
 tregister($04050012),
 tregister($04050012),
+tregister($04170012),
+tregister($04180012),
 tregister($04010013),
 tregister($04010013),
 tregister($04030013),
 tregister($04030013),
 tregister($04090013),
 tregister($04090013),
 tregister($040a0013),
 tregister($040a0013),
 tregister($04050013),
 tregister($04050013),
+tregister($04170013),
+tregister($04180013),
 tregister($04010014),
 tregister($04010014),
 tregister($04030014),
 tregister($04030014),
 tregister($04090014),
 tregister($04090014),
 tregister($040a0014),
 tregister($040a0014),
 tregister($04050014),
 tregister($04050014),
+tregister($04170014),
+tregister($04180014),
 tregister($04010015),
 tregister($04010015),
 tregister($04030015),
 tregister($04030015),
 tregister($04090015),
 tregister($04090015),
 tregister($040a0015),
 tregister($040a0015),
 tregister($04050015),
 tregister($04050015),
+tregister($04170015),
+tregister($04180015),
 tregister($04010016),
 tregister($04010016),
 tregister($04030016),
 tregister($04030016),
 tregister($04090016),
 tregister($04090016),
 tregister($040a0016),
 tregister($040a0016),
 tregister($04050016),
 tregister($04050016),
+tregister($04170016),
+tregister($04180016),
 tregister($04010017),
 tregister($04010017),
 tregister($04030017),
 tregister($04030017),
 tregister($04090017),
 tregister($04090017),
 tregister($040a0017),
 tregister($040a0017),
 tregister($04050017),
 tregister($04050017),
+tregister($04170017),
+tregister($04180017),
 tregister($04010018),
 tregister($04010018),
 tregister($04030018),
 tregister($04030018),
 tregister($04090018),
 tregister($04090018),
 tregister($040a0018),
 tregister($040a0018),
 tregister($04050018),
 tregister($04050018),
+tregister($04170018),
+tregister($04180018),
 tregister($04010019),
 tregister($04010019),
 tregister($04030019),
 tregister($04030019),
 tregister($04090019),
 tregister($04090019),
 tregister($040a0019),
 tregister($040a0019),
 tregister($04050019),
 tregister($04050019),
+tregister($04170019),
+tregister($04180019),
 tregister($0401001A),
 tregister($0401001A),
 tregister($0403001A),
 tregister($0403001A),
 tregister($0409001A),
 tregister($0409001A),
 tregister($040a001A),
 tregister($040a001A),
 tregister($0405001A),
 tregister($0405001A),
+tregister($0417001A),
+tregister($0418001A),
 tregister($0401001B),
 tregister($0401001B),
 tregister($0403001B),
 tregister($0403001B),
 tregister($0409001B),
 tregister($0409001B),
 tregister($040a001B),
 tregister($040a001B),
 tregister($0405001B),
 tregister($0405001B),
+tregister($0417001B),
+tregister($0418001B),
 tregister($0401001C),
 tregister($0401001C),
 tregister($0403001C),
 tregister($0403001C),
 tregister($0409001C),
 tregister($0409001C),
 tregister($040a001C),
 tregister($040a001C),
 tregister($0405001C),
 tregister($0405001C),
+tregister($0417001C),
+tregister($0418001C),
 tregister($0401001D),
 tregister($0401001D),
 tregister($0403001D),
 tregister($0403001D),
 tregister($0409001D),
 tregister($0409001D),
 tregister($040a001D),
 tregister($040a001D),
 tregister($0405001D),
 tregister($0405001D),
+tregister($0417001D),
+tregister($0418001D),
 tregister($0401001E),
 tregister($0401001E),
 tregister($0403001E),
 tregister($0403001E),
 tregister($0409001E),
 tregister($0409001E),
 tregister($040a001E),
 tregister($040a001E),
 tregister($0405001E),
 tregister($0405001E),
+tregister($0417001E),
+tregister($0418001E),
 tregister($0401001F),
 tregister($0401001F),
 tregister($0403001F),
 tregister($0403001F),
 tregister($0409001F),
 tregister($0409001F),
 tregister($040a001F),
 tregister($040a001F),
 tregister($0405001F),
 tregister($0405001F),
+tregister($0417001F),
+tregister($0418001F),
 tregister($05000000),
 tregister($05000000),
 tregister($05000001),
 tregister($05000001),
 tregister($05000002),
 tregister($05000002),

+ 201 - 137
compiler/aarch64/ra64rni.inc

@@ -67,166 +67,230 @@
 64,
 64,
 66,
 66,
 67,
 67,
-72,
-77,
-82,
-87,
-92,
-97,
+74,
+81,
+88,
+95,
 102,
 102,
-107,
-112,
-117,
-122,
-127,
-132,
+109,
+116,
+123,
+130,
 137,
 137,
-142,
-147,
-152,
-157,
-162,
-167,
+144,
+151,
+158,
+165,
 172,
 172,
-177,
-182,
-187,
-192,
-197,
-202,
+179,
+186,
+193,
+200,
 207,
 207,
-212,
-217,
-222,
+214,
+221,
+228,
+235,
+242,
+249,
+256,
+263,
+270,
+277,
+284,
 68,
 68,
-73,
-78,
-83,
-88,
-93,
-98,
+75,
+82,
+89,
+96,
 103,
 103,
-108,
-113,
-118,
-123,
-128,
-133,
+110,
+117,
+124,
+131,
 138,
 138,
-143,
-148,
-153,
-158,
-163,
-168,
+145,
+152,
+159,
+166,
 173,
 173,
-178,
-183,
-188,
-193,
-198,
-203,
+180,
+187,
+194,
+201,
 208,
 208,
-213,
-218,
-223,
+215,
+222,
+229,
+236,
+243,
+250,
+257,
+264,
+271,
+278,
+285,
 71,
 71,
-76,
-81,
-86,
-91,
-96,
-101,
+78,
+85,
+92,
+99,
 106,
 106,
-111,
-116,
-121,
-126,
-131,
-136,
+113,
+120,
+127,
+134,
 141,
 141,
-146,
-151,
-156,
-161,
-166,
-171,
+148,
+155,
+162,
+169,
 176,
 176,
-181,
-186,
-191,
-196,
-201,
-206,
+183,
+190,
+197,
+204,
 211,
 211,
-216,
-221,
-226,
+218,
+225,
+232,
+239,
+246,
+253,
+260,
+267,
+274,
+281,
+288,
 69,
 69,
-74,
-79,
-84,
-89,
-94,
-99,
+76,
+83,
+90,
+97,
 104,
 104,
-109,
-114,
-119,
-124,
-129,
-134,
+111,
+118,
+125,
+132,
 139,
 139,
-144,
-149,
-154,
-159,
-164,
-169,
+146,
+153,
+160,
+167,
 174,
 174,
-179,
-184,
-189,
-194,
-199,
-204,
+181,
+188,
+195,
+202,
 209,
 209,
-214,
-219,
-224,
+216,
+223,
+230,
+237,
+244,
+251,
+258,
+265,
+272,
+279,
+286,
 70,
 70,
-75,
-80,
-85,
-90,
-95,
-100,
+77,
+84,
+91,
+98,
 105,
 105,
-110,
-115,
-120,
-125,
-130,
-135,
+112,
+119,
+126,
+133,
 140,
 140,
-145,
-150,
-155,
-160,
-165,
-170,
+147,
+154,
+161,
+168,
 175,
 175,
-180,
-185,
-190,
-195,
-200,
-205,
+182,
+189,
+196,
+203,
 210,
 210,
-215,
+217,
+224,
+231,
+238,
+245,
+252,
+259,
+266,
+273,
+280,
+287,
+72,
+79,
+86,
+93,
+100,
+107,
+114,
+121,
+128,
+135,
+142,
+149,
+156,
+163,
+170,
+177,
+184,
+191,
+198,
+205,
+212,
+219,
+226,
+233,
+240,
+247,
+254,
+261,
+268,
+275,
+282,
+289,
+73,
+80,
+87,
+94,
+101,
+108,
+115,
+122,
+129,
+136,
+143,
+150,
+157,
+164,
+171,
+178,
+185,
+192,
+199,
+206,
+213,
 220,
 220,
-225,
 227,
 227,
-228,
-229,
-230
+234,
+241,
+248,
+255,
+262,
+269,
+276,
+283,
+290,
+291,
+292,
+293,
+294

+ 200 - 136
compiler/aarch64/ra64sri.inc

@@ -1,170 +1,234 @@
 { don't edit, this file is generated from a64reg.dat }
 { don't edit, this file is generated from a64reg.dat }
 0,
 0,
 67,
 67,
-72,
-117,
-122,
-127,
-132,
+74,
 137,
 137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
+144,
+151,
+158,
+165,
 172,
 172,
-177,
-182,
-187,
-192,
-197,
-202,
+179,
+186,
+193,
+200,
+81,
 207,
 207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
+214,
+221,
+228,
+235,
+242,
+249,
+256,
+263,
+270,
+88,
+277,
+284,
+95,
 102,
 102,
-107,
-112,
-70,
-75,
-120,
-125,
+109,
+116,
+123,
 130,
 130,
-135,
+70,
+77,
 140,
 140,
-145,
-150,
-155,
-160,
-165,
-80,
-170,
+147,
+154,
+161,
+168,
 175,
 175,
-180,
-185,
-190,
-195,
-200,
-205,
+182,
+189,
+196,
+203,
+84,
 210,
 210,
-215,
-85,
-220,
-225,
-90,
-95,
-100,
+217,
+224,
+231,
+238,
+245,
+252,
+259,
+266,
+273,
+91,
+280,
+287,
+98,
 105,
 105,
-110,
-115,
-228,
-229,
-68,
-73,
-118,
-123,
-128,
+112,
+119,
+126,
 133,
 133,
+292,
+293,
+68,
+75,
 138,
 138,
-143,
-148,
-153,
-158,
-163,
-78,
-168,
+145,
+152,
+159,
+166,
 173,
 173,
-178,
-183,
-188,
-193,
-198,
-203,
+180,
+187,
+194,
+201,
+82,
 208,
 208,
-213,
-83,
-218,
-223,
-88,
-93,
-98,
+215,
+222,
+229,
+236,
+243,
+250,
+257,
+264,
+271,
+89,
+278,
+285,
+96,
 103,
 103,
-108,
-113,
-227,
-71,
-76,
-121,
-126,
+110,
+117,
+124,
 131,
 131,
-136,
+291,
+71,
+78,
 141,
 141,
-146,
-151,
-156,
-161,
-166,
-81,
-171,
+148,
+155,
+162,
+169,
 176,
 176,
-181,
-186,
-191,
-196,
-201,
-206,
+183,
+190,
+197,
+204,
+85,
 211,
 211,
-216,
-86,
-221,
-226,
-91,
-96,
-101,
+218,
+225,
+232,
+239,
+246,
+253,
+260,
+267,
+274,
+92,
+281,
+288,
+99,
 106,
 106,
-111,
-116,
-69,
-74,
-119,
-124,
-129,
+113,
+120,
+127,
 134,
 134,
+69,
+76,
 139,
 139,
-144,
+146,
+153,
+160,
+167,
+174,
+181,
+188,
+195,
+202,
+83,
+209,
+216,
+223,
+230,
+237,
+244,
+251,
+258,
+265,
+272,
+90,
+279,
+286,
+97,
+104,
+111,
+118,
+125,
+132,
+66,
+294,
+73,
+72,
+80,
+79,
+143,
+142,
+150,
 149,
 149,
-154,
-159,
+157,
+156,
 164,
 164,
-79,
-169,
-174,
-179,
+163,
+171,
+170,
+178,
+177,
+185,
 184,
 184,
-189,
-194,
+192,
+191,
 199,
 199,
-204,
-209,
-214,
-84,
+198,
+206,
+205,
+87,
+86,
+213,
+212,
+220,
 219,
 219,
-224,
-89,
+227,
+226,
+234,
+233,
+241,
+240,
+248,
+247,
+255,
+254,
+262,
+261,
+269,
+268,
+276,
+275,
 94,
 94,
-99,
-104,
-109,
+93,
+283,
+282,
+290,
+289,
+101,
+100,
+108,
+107,
+115,
 114,
 114,
-66,
-230,
+122,
+121,
+129,
+128,
+136,
+135,
 1,
 1,
 3,
 3,
 21,
 21,

+ 64 - 0
compiler/aarch64/ra64sta.inc

@@ -71,6 +71,10 @@
 64,
 64,
 64,
 64,
 64,
 64,
+64,
+64,
+65,
+65,
 65,
 65,
 65,
 65,
 65,
 65,
@@ -81,6 +85,10 @@
 66,
 66,
 66,
 66,
 66,
 66,
+66,
+66,
+67,
+67,
 67,
 67,
 67,
 67,
 67,
 67,
@@ -91,6 +99,10 @@
 68,
 68,
 68,
 68,
 68,
 68,
+68,
+68,
+69,
+69,
 69,
 69,
 69,
 69,
 69,
 69,
@@ -101,6 +113,10 @@
 70,
 70,
 70,
 70,
 70,
 70,
+70,
+70,
+71,
+71,
 71,
 71,
 71,
 71,
 71,
 71,
@@ -111,6 +127,10 @@
 72,
 72,
 72,
 72,
 72,
 72,
+72,
+72,
+73,
+73,
 73,
 73,
 73,
 73,
 73,
 73,
@@ -121,6 +141,10 @@
 74,
 74,
 74,
 74,
 74,
 74,
+74,
+74,
+75,
+75,
 75,
 75,
 75,
 75,
 75,
 75,
@@ -131,6 +155,10 @@
 76,
 76,
 76,
 76,
 76,
 76,
+76,
+76,
+77,
+77,
 77,
 77,
 77,
 77,
 77,
 77,
@@ -141,6 +169,10 @@
 78,
 78,
 78,
 78,
 78,
 78,
+78,
+78,
+79,
+79,
 79,
 79,
 79,
 79,
 79,
 79,
@@ -151,6 +183,10 @@
 80,
 80,
 80,
 80,
 80,
 80,
+80,
+80,
+81,
+81,
 81,
 81,
 81,
 81,
 81,
 81,
@@ -161,6 +197,10 @@
 82,
 82,
 82,
 82,
 82,
 82,
+82,
+82,
+83,
+83,
 83,
 83,
 83,
 83,
 83,
 83,
@@ -171,6 +211,10 @@
 84,
 84,
 84,
 84,
 84,
 84,
+84,
+84,
+85,
+85,
 85,
 85,
 85,
 85,
 85,
 85,
@@ -181,6 +225,10 @@
 86,
 86,
 86,
 86,
 86,
 86,
+86,
+86,
+87,
+87,
 87,
 87,
 87,
 87,
 87,
 87,
@@ -191,6 +239,10 @@
 88,
 88,
 88,
 88,
 88,
 88,
+88,
+88,
+89,
+89,
 89,
 89,
 89,
 89,
 89,
 89,
@@ -201,6 +253,10 @@
 90,
 90,
 90,
 90,
 90,
 90,
+90,
+90,
+91,
+91,
 91,
 91,
 91,
 91,
 91,
 91,
@@ -211,6 +267,10 @@
 92,
 92,
 92,
 92,
 92,
 92,
+92,
+92,
+93,
+93,
 93,
 93,
 93,
 93,
 93,
 93,
@@ -221,6 +281,10 @@
 94,
 94,
 94,
 94,
 94,
 94,
+94,
+94,
+95,
+95,
 95,
 95,
 95,
 95,
 95,
 95,

+ 64 - 0
compiler/aarch64/ra64std.inc

@@ -71,161 +71,225 @@
 's0',
 's0',
 'd0',
 'd0',
 'q0',
 'q0',
+'v0.8b',
+'v0.16b',
 'b1',
 'b1',
 'h1',
 'h1',
 's1',
 's1',
 'd1',
 'd1',
 'q1',
 'q1',
+'v1.8b',
+'v1.16b',
 'b2',
 'b2',
 'h2',
 'h2',
 's2',
 's2',
 'd2',
 'd2',
 'q2',
 'q2',
+'v2.8b',
+'v2.16b',
 'b3',
 'b3',
 'h3',
 'h3',
 's3',
 's3',
 'd3',
 'd3',
 'q3',
 'q3',
+'v3.8b',
+'v3.16b',
 'b4',
 'b4',
 'h4',
 'h4',
 's4',
 's4',
 'd4',
 'd4',
 'q4',
 'q4',
+'v4.8b',
+'v4.16b',
 'b5',
 'b5',
 'h5',
 'h5',
 's5',
 's5',
 'd5',
 'd5',
 'q5',
 'q5',
+'v5.8b',
+'v5.16b',
 'b6',
 'b6',
 'h6',
 'h6',
 's6',
 's6',
 'd6',
 'd6',
 'q6',
 'q6',
+'v6.8b',
+'v6.16b',
 'b7',
 'b7',
 'h7',
 'h7',
 's7',
 's7',
 'd7',
 'd7',
 'q7',
 'q7',
+'v7.8b',
+'v7.16b',
 'b8',
 'b8',
 'h8',
 'h8',
 's8',
 's8',
 'd8',
 'd8',
 'q8',
 'q8',
+'v8.8b',
+'v8.16b',
 'b9',
 'b9',
 'h9',
 'h9',
 's9',
 's9',
 'd9',
 'd9',
 'q9',
 'q9',
+'v9.8b',
+'v9.16b',
 'b10',
 'b10',
 'h10',
 'h10',
 's10',
 's10',
 'd10',
 'd10',
 'q10',
 'q10',
+'v10.8b',
+'v10.16b',
 'b11',
 'b11',
 'h11',
 'h11',
 's11',
 's11',
 'd11',
 'd11',
 'q11',
 'q11',
+'v11.8b',
+'v11.16b',
 'b12',
 'b12',
 'h12',
 'h12',
 's12',
 's12',
 'd12',
 'd12',
 'q12',
 'q12',
+'v12.8b',
+'v12.16b',
 'b13',
 'b13',
 'h13',
 'h13',
 's13',
 's13',
 'd13',
 'd13',
 'q13',
 'q13',
+'v13.8b',
+'v13.16b',
 'b14',
 'b14',
 'h14',
 'h14',
 's14',
 's14',
 'd14',
 'd14',
 'q14',
 'q14',
+'v14.8b',
+'v14.16b',
 'b15',
 'b15',
 'h15',
 'h15',
 's15',
 's15',
 'd15',
 'd15',
 'q15',
 'q15',
+'v15.8b',
+'v15.16b',
 'b16',
 'b16',
 'h16',
 'h16',
 's16',
 's16',
 'd16',
 'd16',
 'q16',
 'q16',
+'v16.8b',
+'v16.16b',
 'b17',
 'b17',
 'h17',
 'h17',
 's17',
 's17',
 'd17',
 'd17',
 'q17',
 'q17',
+'v17.8b',
+'v17.16b',
 'b18',
 'b18',
 'h18',
 'h18',
 's18',
 's18',
 'd18',
 'd18',
 'q18',
 'q18',
+'v18.8b',
+'v18.16b',
 'b19',
 'b19',
 'h19',
 'h19',
 's19',
 's19',
 'd19',
 'd19',
 'q19',
 'q19',
+'v19.8b',
+'v19.16b',
 'b20',
 'b20',
 'h20',
 'h20',
 's20',
 's20',
 'd20',
 'd20',
 'q20',
 'q20',
+'v20.8b',
+'v20.16b',
 'b21',
 'b21',
 'h21',
 'h21',
 's21',
 's21',
 'd21',
 'd21',
 'q21',
 'q21',
+'v21.8b',
+'v21.16b',
 'b22',
 'b22',
 'h22',
 'h22',
 's22',
 's22',
 'd22',
 'd22',
 'q22',
 'q22',
+'v22.8b',
+'v22.16b',
 'b23',
 'b23',
 'h23',
 'h23',
 's23',
 's23',
 'd23',
 'd23',
 'q23',
 'q23',
+'v23.8b',
+'v23.16b',
 'b24',
 'b24',
 'h24',
 'h24',
 's24',
 's24',
 'd24',
 'd24',
 'q24',
 'q24',
+'v24.8b',
+'v24.16b',
 'b25',
 'b25',
 'h25',
 'h25',
 's25',
 's25',
 'd25',
 'd25',
 'q25',
 'q25',
+'v25.8b',
+'v25.16b',
 'b26',
 'b26',
 'h26',
 'h26',
 's26',
 's26',
 'd26',
 'd26',
 'q26',
 'q26',
+'v26.8b',
+'v26.16b',
 'b27',
 'b27',
 'h27',
 'h27',
 's27',
 's27',
 'd27',
 'd27',
 'q27',
 'q27',
+'v27.8b',
+'v27.16b',
 'b28',
 'b28',
 'h28',
 'h28',
 's28',
 's28',
 'd28',
 'd28',
 'q28',
 'q28',
+'v28.8b',
+'v28.16b',
 'b29',
 'b29',
 'h29',
 'h29',
 's29',
 's29',
 'd29',
 'd29',
 'q29',
 'q29',
+'v29.8b',
+'v29.16b',
 'b30',
 'b30',
 'h30',
 'h30',
 's30',
 's30',
 'd30',
 'd30',
 'q30',
 'q30',
+'v30.8b',
+'v30.16b',
 'b31',
 'b31',
 'h31',
 'h31',
 's31',
 's31',
 'd31',
 'd31',
 'q31',
 'q31',
+'v31.8b',
+'v31.16b',
 'nzcv',
 'nzcv',
 'fpcr',
 'fpcr',
 'fpsr',
 'fpsr',

+ 64 - 0
compiler/aarch64/ra64sup.inc

@@ -71,161 +71,225 @@ RS_H0 = $00;
 RS_S0 = $00;
 RS_S0 = $00;
 RS_D0 = $00;
 RS_D0 = $00;
 RS_Q0 = $00;
 RS_Q0 = $00;
+RS_V08B = $00;
+RS_V016B = $00;
 RS_B1 = $01;
 RS_B1 = $01;
 RS_H1 = $01;
 RS_H1 = $01;
 RS_S1 = $01;
 RS_S1 = $01;
 RS_D1 = $01;
 RS_D1 = $01;
 RS_Q1 = $01;
 RS_Q1 = $01;
+RS_V18B = $01;
+RS_V116B = $01;
 RS_B2 = $02;
 RS_B2 = $02;
 RS_H2 = $02;
 RS_H2 = $02;
 RS_S2 = $02;
 RS_S2 = $02;
 RS_D2 = $02;
 RS_D2 = $02;
 RS_Q2 = $02;
 RS_Q2 = $02;
+RS_V28B = $02;
+RS_V216B = $02;
 RS_B3 = $03;
 RS_B3 = $03;
 RS_H3 = $03;
 RS_H3 = $03;
 RS_S3 = $03;
 RS_S3 = $03;
 RS_D3 = $03;
 RS_D3 = $03;
 RS_Q3 = $03;
 RS_Q3 = $03;
+RS_V38B = $03;
+RS_V316B = $03;
 RS_B4 = $04;
 RS_B4 = $04;
 RS_H4 = $04;
 RS_H4 = $04;
 RS_S4 = $04;
 RS_S4 = $04;
 RS_D4 = $04;
 RS_D4 = $04;
 RS_Q4 = $04;
 RS_Q4 = $04;
+RS_V48B = $04;
+RS_V416B = $04;
 RS_B5 = $05;
 RS_B5 = $05;
 RS_H5 = $05;
 RS_H5 = $05;
 RS_S5 = $05;
 RS_S5 = $05;
 RS_D5 = $05;
 RS_D5 = $05;
 RS_Q5 = $05;
 RS_Q5 = $05;
+RS_V58B = $05;
+RS_V516B = $05;
 RS_B6 = $06;
 RS_B6 = $06;
 RS_H6 = $06;
 RS_H6 = $06;
 RS_S6 = $06;
 RS_S6 = $06;
 RS_D6 = $06;
 RS_D6 = $06;
 RS_Q6 = $06;
 RS_Q6 = $06;
+RS_V68B = $06;
+RS_V616B = $06;
 RS_B7 = $07;
 RS_B7 = $07;
 RS_H7 = $07;
 RS_H7 = $07;
 RS_S7 = $07;
 RS_S7 = $07;
 RS_D7 = $07;
 RS_D7 = $07;
 RS_Q7 = $07;
 RS_Q7 = $07;
+RS_V78B = $07;
+RS_V716B = $07;
 RS_B8 = $08;
 RS_B8 = $08;
 RS_H8 = $08;
 RS_H8 = $08;
 RS_S8 = $08;
 RS_S8 = $08;
 RS_D8 = $08;
 RS_D8 = $08;
 RS_Q8 = $08;
 RS_Q8 = $08;
+RS_V88B = $08;
+RS_V816B = $08;
 RS_B9 = $09;
 RS_B9 = $09;
 RS_H9 = $09;
 RS_H9 = $09;
 RS_S9 = $09;
 RS_S9 = $09;
 RS_D9 = $09;
 RS_D9 = $09;
 RS_Q9 = $09;
 RS_Q9 = $09;
+RS_V98B = $09;
+RS_V916B = $09;
 RS_B10 = $0A;
 RS_B10 = $0A;
 RS_H10 = $0A;
 RS_H10 = $0A;
 RS_S10 = $0A;
 RS_S10 = $0A;
 RS_D10 = $0A;
 RS_D10 = $0A;
 RS_Q10 = $0A;
 RS_Q10 = $0A;
+RS_V108B = $0A;
+RS_V1016B = $0A;
 RS_B11 = $0B;
 RS_B11 = $0B;
 RS_H11 = $0B;
 RS_H11 = $0B;
 RS_S11 = $0B;
 RS_S11 = $0B;
 RS_D11 = $0B;
 RS_D11 = $0B;
 RS_Q11 = $0B;
 RS_Q11 = $0B;
+RS_V118B = $0B;
+RS_V1116B = $0B;
 RS_B12 = $0C;
 RS_B12 = $0C;
 RS_H12 = $0C;
 RS_H12 = $0C;
 RS_S12 = $0C;
 RS_S12 = $0C;
 RS_D12 = $0C;
 RS_D12 = $0C;
 RS_Q12 = $0C;
 RS_Q12 = $0C;
+RS_V128B = $0C;
+RS_V1216B = $0C;
 RS_B13 = $0D;
 RS_B13 = $0D;
 RS_H13 = $0D;
 RS_H13 = $0D;
 RS_S13 = $0D;
 RS_S13 = $0D;
 RS_D13 = $0D;
 RS_D13 = $0D;
 RS_Q13 = $0D;
 RS_Q13 = $0D;
+RS_V138B = $0D;
+RS_V1316B = $0D;
 RS_B14 = $0E;
 RS_B14 = $0E;
 RS_H14 = $0E;
 RS_H14 = $0E;
 RS_S14 = $0E;
 RS_S14 = $0E;
 RS_D14 = $0E;
 RS_D14 = $0E;
 RS_Q14 = $0E;
 RS_Q14 = $0E;
+RS_V148B = $0E;
+RS_V1416B = $0E;
 RS_B15 = $0F;
 RS_B15 = $0F;
 RS_H15 = $0F;
 RS_H15 = $0F;
 RS_S15 = $0F;
 RS_S15 = $0F;
 RS_D15 = $0F;
 RS_D15 = $0F;
 RS_Q15 = $0F;
 RS_Q15 = $0F;
+RS_V158B = $0F;
+RS_V1516B = $0F;
 RS_B16 = $10;
 RS_B16 = $10;
 RS_H16 = $10;
 RS_H16 = $10;
 RS_S16 = $10;
 RS_S16 = $10;
 RS_D16 = $10;
 RS_D16 = $10;
 RS_Q16 = $10;
 RS_Q16 = $10;
+RS_V168B = $10;
+RS_V1616B = $10;
 RS_B17 = $11;
 RS_B17 = $11;
 RS_H17 = $11;
 RS_H17 = $11;
 RS_S17 = $11;
 RS_S17 = $11;
 RS_D17 = $11;
 RS_D17 = $11;
 RS_Q17 = $11;
 RS_Q17 = $11;
+RS_V178B = $11;
+RS_V1716B = $11;
 RS_B18 = $12;
 RS_B18 = $12;
 RS_H18 = $12;
 RS_H18 = $12;
 RS_S18 = $12;
 RS_S18 = $12;
 RS_D18 = $12;
 RS_D18 = $12;
 RS_Q18 = $12;
 RS_Q18 = $12;
+RS_V188B = $12;
+RS_V1816B = $12;
 RS_B19 = $13;
 RS_B19 = $13;
 RS_H19 = $13;
 RS_H19 = $13;
 RS_S19 = $13;
 RS_S19 = $13;
 RS_D19 = $13;
 RS_D19 = $13;
 RS_Q19 = $13;
 RS_Q19 = $13;
+RS_V198B = $13;
+RS_V1916B = $13;
 RS_B20 = $14;
 RS_B20 = $14;
 RS_H20 = $14;
 RS_H20 = $14;
 RS_S20 = $14;
 RS_S20 = $14;
 RS_D20 = $14;
 RS_D20 = $14;
 RS_Q20 = $14;
 RS_Q20 = $14;
+RS_V208B = $14;
+RS_V2016B = $14;
 RS_B21 = $15;
 RS_B21 = $15;
 RS_H21 = $15;
 RS_H21 = $15;
 RS_S21 = $15;
 RS_S21 = $15;
 RS_D21 = $15;
 RS_D21 = $15;
 RS_Q21 = $15;
 RS_Q21 = $15;
+RS_V218B = $15;
+RS_V2116B = $15;
 RS_B22 = $16;
 RS_B22 = $16;
 RS_H22 = $16;
 RS_H22 = $16;
 RS_S22 = $16;
 RS_S22 = $16;
 RS_D22 = $16;
 RS_D22 = $16;
 RS_Q22 = $16;
 RS_Q22 = $16;
+RS_V228B = $16;
+RS_V2216B = $16;
 RS_B23 = $17;
 RS_B23 = $17;
 RS_H23 = $17;
 RS_H23 = $17;
 RS_S23 = $17;
 RS_S23 = $17;
 RS_D23 = $17;
 RS_D23 = $17;
 RS_Q23 = $17;
 RS_Q23 = $17;
+RS_V238B = $17;
+RS_V2316B = $17;
 RS_B24 = $18;
 RS_B24 = $18;
 RS_H24 = $18;
 RS_H24 = $18;
 RS_S24 = $18;
 RS_S24 = $18;
 RS_D24 = $18;
 RS_D24 = $18;
 RS_Q24 = $18;
 RS_Q24 = $18;
+RS_V248B = $18;
+RS_V2416B = $18;
 RS_B25 = $19;
 RS_B25 = $19;
 RS_H25 = $19;
 RS_H25 = $19;
 RS_S25 = $19;
 RS_S25 = $19;
 RS_D25 = $19;
 RS_D25 = $19;
 RS_Q25 = $19;
 RS_Q25 = $19;
+RS_V258B = $19;
+RS_V2516B = $19;
 RS_B26 = $1A;
 RS_B26 = $1A;
 RS_H26 = $1A;
 RS_H26 = $1A;
 RS_S26 = $1A;
 RS_S26 = $1A;
 RS_D26 = $1A;
 RS_D26 = $1A;
 RS_Q26 = $1A;
 RS_Q26 = $1A;
+RS_V268B = $1A;
+RS_V2616B = $1A;
 RS_B27 = $1B;
 RS_B27 = $1B;
 RS_H27 = $1B;
 RS_H27 = $1B;
 RS_S27 = $1B;
 RS_S27 = $1B;
 RS_D27 = $1B;
 RS_D27 = $1B;
 RS_Q27 = $1B;
 RS_Q27 = $1B;
+RS_V278B = $1B;
+RS_V2716B = $1B;
 RS_B28 = $1C;
 RS_B28 = $1C;
 RS_H28 = $1C;
 RS_H28 = $1C;
 RS_S28 = $1C;
 RS_S28 = $1C;
 RS_D28 = $1C;
 RS_D28 = $1C;
 RS_Q28 = $1C;
 RS_Q28 = $1C;
+RS_V288B = $1C;
+RS_V2816B = $1C;
 RS_B29 = $1D;
 RS_B29 = $1D;
 RS_H29 = $1D;
 RS_H29 = $1D;
 RS_S29 = $1D;
 RS_S29 = $1D;
 RS_D29 = $1D;
 RS_D29 = $1D;
 RS_Q29 = $1D;
 RS_Q29 = $1D;
+RS_V298B = $1D;
+RS_V2916B = $1D;
 RS_B30 = $1E;
 RS_B30 = $1E;
 RS_H30 = $1E;
 RS_H30 = $1E;
 RS_S30 = $1E;
 RS_S30 = $1E;
 RS_D30 = $1E;
 RS_D30 = $1E;
 RS_Q30 = $1E;
 RS_Q30 = $1E;
+RS_V308B = $1E;
+RS_V3016B = $1E;
 RS_B31 = $1F;
 RS_B31 = $1F;
 RS_H31 = $1F;
 RS_H31 = $1F;
 RS_S31 = $1F;
 RS_S31 = $1F;
 RS_D31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
 RS_Q31 = $1F;
+RS_V318B = $1F;
+RS_V3116B = $1F;
 RS_NZCV = $00;
 RS_NZCV = $00;
 RS_FPCR = $01;
 RS_FPCR = $01;
 RS_FPSR = $02;
 RS_FPSR = $02;

+ 3 - 0
compiler/aarch64/racpu.pas

@@ -73,6 +73,7 @@ unit racpu;
         { a 32 bit integer register could actually be 16 or 8 bit }
         { a 32 bit integer register could actually be 16 or 8 bit }
         if result=OS_32 then
         if result=OS_32 then
           case oppostfix of
           case oppostfix of
+            PF_NONE: ;
             PF_B:
             PF_B:
               result:=OS_8;
               result:=OS_8;
             PF_SB:
             PF_SB:
@@ -81,6 +82,8 @@ unit racpu;
               result:=OS_16;
               result:=OS_16;
             PF_SH:
             PF_SH:
               result:=OS_S16;
               result:=OS_S16;
+            else
+              Message(asmr_e_invalid_opcode_and_operand)
           end;
           end;
       end;
       end;
 
 

+ 15 - 12
compiler/aarch64/racpugas.pas

@@ -55,10 +55,10 @@ Unit racpugas;
       globtype,verbose,
       globtype,verbose,
       systems,aasmbase,aasmtai,aasmdata,aasmcpu,
       systems,aasmbase,aasmtai,aasmdata,aasmcpu,
       { symtable }
       { symtable }
-      symconst,symsym,
+      symconst,symsym,symdef,
       procinfo,
       procinfo,
       rabase,rautils,
       rabase,rautils,
-      cgbase,cgutils;
+      cgbase,cgutils,paramgr;
 
 
 
 
     function taarch64attreader.is_register(const s:string):boolean;
     function taarch64attreader.is_register(const s:string):boolean;
@@ -461,7 +461,7 @@ Unit racpugas;
 
 
       const
       const
         shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
         shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
-          ('LSL','LSR','ASR',
+          ('LSL','LSR','ASR','ROR',
            'UXTB','UXTH','UXTW','UXTX',
            'UXTB','UXTH','UXTW','UXTX',
            'SXTB','SXTH','SXTW','SXTX');
            'SXTB','SXTH','SXTW','SXTX');
       var
       var
@@ -485,8 +485,8 @@ Unit racpugas;
                       useszr:=false;
                       useszr:=false;
                       for i:=low(instr.operands) to pred(opnr) do
                       for i:=low(instr.operands) to pred(opnr) do
                         begin
                         begin
-                          if (instr.operands[1].opr.typ=OPR_REGISTER) then
-                            case getsupreg(instr.operands[1].opr.reg) of
+                          if (instr.operands[i].opr.typ=OPR_REGISTER) then
+                            case getsupreg(instr.operands[i].opr.reg) of
                               RS_XZR:
                               RS_XZR:
                                 useszr:=true;
                                 useszr:=true;
                               RS_SP:
                               RS_SP:
@@ -494,7 +494,10 @@ Unit racpugas;
                             end;
                             end;
                         end;
                         end;
                       result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
                       result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
-                    end
+                      if result then
+                        instr.Ops:=opnr;
+                    end;
+                  break;
                 end;
                 end;
           end;
           end;
       end;
       end;
@@ -520,6 +523,8 @@ Unit racpugas;
                     end;
                     end;
                 end;
                 end;
             end;
             end;
+          else
+            ;
         end;
         end;
         result:=C_None;;
         result:=C_None;;
       end;
       end;
@@ -560,7 +565,8 @@ Unit racpugas;
                oper.opr.symbol:=hl;
                oper.opr.symbol:=hl;
              end
              end
             else if (actopcode=A_ADR) or
             else if (actopcode=A_ADR) or
-               (actopcode=A_ADRP) then
+               (actopcode=A_ADRP) or
+               (actopcode=A_LDR) then
               begin
               begin
                 oper.InitRef;
                 oper.InitRef;
                 MaybeAddGotAddrMode;
                 MaybeAddGotAddrMode;
@@ -607,10 +613,8 @@ Unit racpugas;
                   { don't allow direct access to fields of parameters, because that
                   { don't allow direct access to fields of parameters, because that
                     will generate buggy code. Allow it only for explicit typecasting }
                     will generate buggy code. Allow it only for explicit typecasting }
                   if hasdot and
                   if hasdot and
-                     (not oper.hastype) and
-                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
-                     (current_procinfo.procdef.proccalloption<>pocall_register) then
-                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                     (not oper.hastype) then
+                    checklocalsubscript(oper.opr.localsym);
                   inc(oper.opr.localsymofs,l)
                   inc(oper.opr.localsymofs,l)
                 end;
                 end;
               OPR_CONSTANT :
               OPR_CONSTANT :
@@ -935,7 +939,6 @@ Unit racpugas;
         j  : longint;
         j  : longint;
         hs : string;
         hs : string;
         maxlen : longint;
         maxlen : longint;
-        icond : tasmcond;
       Begin
       Begin
         { making s a value parameter would break other assembler readers }
         { making s a value parameter would break other assembler readers }
         hs:=s;
         hs:=s;

+ 5 - 1
compiler/aarch64/rgcpu.pas

@@ -83,7 +83,7 @@ implementation
               hreg:=cg.getaddressregister(helplist);
               hreg:=cg.getaddressregister(helplist);
 
 
             cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
             cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
-            reference_reset_base(tmpref,spilltemp.base,0,sizeof(pint),[]);
+            reference_reset_base(tmpref,spilltemp.base,0,spilltemp.temppos,sizeof(pint),[]);
             tmpref.index:=hreg;
             tmpref.index:=hreg;
             if isload then
             if isload then
               helpins:=spilling_create_load(tmpref,tempreg)
               helpins:=spilling_create_load(tmpref,tempreg)
@@ -140,6 +140,8 @@ implementation
                { ok in immediate form }
                { ok in immediate form }
                if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
                if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
                  exit;
                  exit;
+             else
+               ;
            end;
            end;
            { add interferences for other registers }
            { add interferences for other registers }
            for i:=0 to taicpu(p).ops-1 do
            for i:=0 to taicpu(p).ops-1 do
@@ -163,6 +165,8 @@ implementation
                              add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
                              add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
                        end;
                        end;
                    end;
                    end;
+                 else
+                   ;
                end;
                end;
              end;
              end;
          end;
          end;

+ 16 - 113
compiler/aasmbase.pas

@@ -39,8 +39,10 @@ interface
     type
     type
        TAsmsymbind=(
        TAsmsymbind=(
          AB_NONE,AB_EXTERNAL,AB_COMMON,AB_LOCAL,AB_GLOBAL,AB_WEAK_EXTERNAL,
          AB_NONE,AB_EXTERNAL,AB_COMMON,AB_LOCAL,AB_GLOBAL,AB_WEAK_EXTERNAL,
-         { global in the current program/library, but not visible outside it }
-         AB_PRIVATE_EXTERN,AB_LAZY,AB_IMPORT,
+         { global in the current program/library, but not visible outside it
+           (= "hidden" in ELF) }
+         AB_PRIVATE_EXTERN,
+         AB_LAZY,AB_IMPORT,
          { a symbol that's internal to the compiler and used as a temp }
          { a symbol that's internal to the compiler and used as a temp }
          AB_TEMP,
          AB_TEMP,
          { a global symbol that points to another global symbol and is only used
          { a global symbol that points to another global symbol and is only used
@@ -74,10 +76,10 @@ interface
        { is the label only there for getting an DataOffset (e.g. for i/o
        { is the label only there for getting an DataOffset (e.g. for i/o
          checks -> alt_addr) or is it a jump target (alt_jump), for debug
          checks -> alt_addr) or is it a jump target (alt_jump), for debug
          info alt_dbgline and alt_dbgfile, etc. }
          info alt_dbgline and alt_dbgfile, etc. }
-       TAsmLabelType = (alt_jump,alt_addr,alt_data,alt_dbgline,alt_dbgfile,alt_dbgtype,alt_dbgframe);
+       TAsmLabelType = (alt_jump,alt_addr,alt_data,alt_dbgline,alt_dbgfile,alt_dbgtype,alt_dbgframe,alt_eh_begin,alt_eh_end);
 
 
     const
     const
-       asmlabeltypeprefix : array[TAsmLabeltype] of char = ('j','a','d','l','f','t','c');
+       asmlabeltypeprefix : array[TAsmLabeltype] of string[2] = ('j','a','d','l','f','t','c','eb','ee');
        asmsymbindname : array[TAsmsymbind] of string[23] = ('none', 'external','common',
        asmsymbindname : array[TAsmsymbind] of string[23] = ('none', 'external','common',
        'local','global','weak external','private external','lazy','import','internal temp',
        'local','global','weak external','private external','lazy','import','internal temp',
        'indirect','external indirect');
        'indirect','external indirect');
@@ -166,13 +168,20 @@ interface
          { stack segment for 16-bit DOS }
          { stack segment for 16-bit DOS }
          sec_stack,
          sec_stack,
          { initial heap segment for 16-bit DOS }
          { initial heap segment for 16-bit DOS }
-         sec_heap
+         sec_heap,
+         { dwarf based/gcc style exception handling }
+         sec_gcc_except_table,
+         sec_arm_attribute
        );
        );
 
 
        TObjCAsmSectionType = sec_objc_class..sec_objc_protolist;
        TObjCAsmSectionType = sec_objc_class..sec_objc_protolist;
 
 
        TAsmSectionOrder = (secorder_begin,secorder_default,secorder_end);
        TAsmSectionOrder = (secorder_begin,secorder_default,secorder_end);
 
 
+       TSectionFlag = (SF_A,SF_W,SF_X);
+       TSectionFlags = set of TSectionFlag;
+       TSectionProgbits = (SPB_None,SPB_PROGBITS,SPB_NOBITS,SPB_NOTE,SPB_ARM_ATTRIBUTES);
+
        TAsmSymbol = class(TFPHashObject)
        TAsmSymbol = class(TFPHashObject)
        private
        private
          { this need to be incremented with every symbol loading into the
          { this need to be incremented with every symbol loading into the
@@ -219,6 +228,8 @@ interface
          labelnr   : longint;
          labelnr   : longint;
          labeltype : TAsmLabelType;
          labeltype : TAsmLabelType;
          is_set    : boolean;
          is_set    : boolean;
+         is_public : boolean;
+         defined_in_asmstatement : boolean;
          constructor Createlocal(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createlocal(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createstatic(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createstatic(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createglobal(AList: TFPHashObjectList; const modulename: TSymStr; nr: longint; ltyp: TAsmLabelType);
          constructor Createglobal(AList: TFPHashObjectList; const modulename: TSymStr; nr: longint; ltyp: TAsmLabelType);
@@ -229,11 +240,6 @@ interface
     function create_smartlink_library:boolean;inline;
     function create_smartlink_library:boolean;inline;
     function create_smartlink:boolean;inline;
     function create_smartlink:boolean;inline;
 
 
-    function LengthUleb128(a: qword) : byte;
-    function LengthSleb128(a: int64) : byte;
-    function EncodeUleb128(a: qword;out buf) : byte;
-    function EncodeSleb128(a: int64;out buf) : byte;
-
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
 
 
     { dummy default noop callback }
     { dummy default noop callback }
@@ -282,109 +288,6 @@ implementation
       end;
       end;
 
 
 
 
-    function LengthUleb128(a: qword) : byte;
-      begin
-        result:=0;
-        repeat
-          a := a shr 7;
-          inc(result);
-          if a=0 then
-            break;
-        until false;
-      end;
-
-
-    function LengthSleb128(a: int64) : byte;
-      var
-        b, size: byte;
-        asign : int64;
-        neg, more: boolean;
-      begin
-        more := true;
-        neg := a < 0;
-        size := sizeof(a)*8;
-        result:=0;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if neg then
-            begin
-              { Use a variable to be sure that the correct or mask is generated }
-              asign:=1;
-              asign:=asign shl (size - 7);
-              a := a or -asign;
-            end;
-          if (((a = 0) and
-               (b and $40 = 0)) or
-              ((a = -1) and
-               (b and $40 <> 0))) then
-            more := false;
-          inc(result);
-          if not(more) then
-            break;
-        until false;
-      end;
-
-
-    function EncodeUleb128(a: qword;out buf) : byte;
-      var
-        b: byte;
-        pbuf : pbyte;
-      begin
-        result:=0;
-        pbuf:=@buf;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if a<>0 then
-            b := b or $80;
-          pbuf^:=b;
-          inc(pbuf);
-          inc(result);
-          if a=0 then
-            break;
-        until false;
-      end;
-
-
-    function EncodeSleb128(a: int64;out buf) : byte;
-      var
-        b, size: byte;
-        asign : int64;
-        neg, more: boolean;
-        pbuf : pbyte;
-      begin
-        more := true;
-        neg := a < 0;
-        size := sizeof(a)*8;
-        result:=0;
-        pbuf:=@buf;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if neg then
-            begin
-              { Use a variable to be sure that the correct or mask is generated }
-              asign:=1;
-              asign:=asign shl (size - 7);
-              a := a or -asign;
-            end;
-          if (((a = 0) and
-               (b and $40 = 0)) or
-              ((a = -1) and
-               (b and $40 <> 0))) then
-            more := false
-          else
-            b := b or $80;
-          pbuf^:=b;
-          inc(pbuf);
-          inc(result);
-          if not(more) then
-            break;
-        until false;
-      end;
-
-
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
       var
       var
         i : longint;
         i : longint;

+ 198 - 0
compiler/aasmcfi.pas

@@ -0,0 +1,198 @@
+{
+    Copyright (c) 2019 by Jonas Maebe, member of the
+    Free Pascal Compiler development team
+
+    Dwarf Call Frame Information directives
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit aasmcfi;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      globtype,
+      cgbase,
+      aasmtai;
+
+    type
+      tcfikind =
+        (cfi_startproc,
+         cfi_endproc,
+         cfi_personality,
+         cfi_personality_id,
+         cfi_fde_data,
+         cfi_lsda_encoding,
+         cfi_inline_lsda,
+         cfi_def_cfa,
+         cfi_def_cfa_register,
+         cfi_def_cfa_offset,
+         cfi_adjust_cfa_offset,
+         cfi_offset,
+         cfi_val_offset,
+         cfi_rel_offset,
+         cfi_register,
+         cfi_restore,
+         cfi_undefined,
+         cfi_same_value,
+         cfi_remember_state,
+         cfi_restore_state,
+         cfi_return_column,
+         cfi_signal_frame,
+         cfi_window_save,
+         cfi_escape,
+         cfi_val_encoded_addr
+        );
+
+{$push}
+{$j-}
+      const
+        cfi2str: array[tcfikind] of string[length('.cfi_adjust_cfa_offset')] =
+          ('.cfi_startproc',
+           '.cfi_endproc',
+           '.cfi_personality',
+           '.cfi_personality_id',
+           '.cfi_fde_data',
+           '.cfi_lsda_encoding',
+           '.cfi_inline_lsda',
+           '.cfi_def_cfa',
+           '.cfi_def_cfa_register',
+           '.cfi_def_cfa_offset',
+           '.cfi_adjust_cfa_offset',
+           '.cfi_offset',
+           '.cfi_val_offset',
+           '.cfi_rel_offset',
+           '.cfi_register',
+           '.cfi_restore',
+           '.cfi_undefined',
+           '.cfi_same_value',
+           '.cfi_remember_state',
+           '.cfi_restore_state',
+           '.cfi_return_column',
+           '.cfi_signal_frame',
+           '.cfi_window_save',
+           '.cfi_escape',
+           '.cfi_val_encoded_addr'
+          );
+{$pop}
+
+    type
+      tai_cfi_base = class abstract(tai)
+        cfityp: tcfikind;
+        constructor create(ctyp: tcfikind);
+      end;
+
+      tai_cfi_op_none = class(tai_cfi_base)
+      end;
+
+      tai_cfi_op_val = class(tai_cfi_base)
+        val1: aint;
+        constructor create(ctyp: tcfikind; const a: aint);
+      end;
+
+      tai_cfi_op_string = class(tai_cfi_base)
+        s1: TSymStr;
+        constructor create(ctyp: tcfikind; const str1: TSymStr);
+      end;
+
+      tai_cfi_op_val_string = class(tai_cfi_op_val)
+        s: TSymStr;
+        constructor create(ctyp: tcfikind; const a: aint; const str: TSymStr);
+      end;
+
+      tai_cfi_op_string_string = class(tai_cfi_op_string)
+        s2: TSymStr;
+        constructor create(ctyp: tcfikind; const str1, str2: TSymStr);
+      end;
+
+      tai_cfi_op_reg = class(tai_cfi_base)
+        reg1: tregister;
+        constructor create(ctyp: tcfikind; r: tregister);
+      end;
+
+      tai_cfi_op_reg_val = class(tai_cfi_op_reg)
+        val: aint;
+        constructor create(ctyp: tcfikind; r: tregister; a: aint);
+      end;
+
+      tai_cfi_op_reg_reg = class(tai_cfi_op_reg)
+        reg2: tregister;
+        constructor create(ctyp: tcfikind; r1, r2: tregister);
+      end;
+
+
+  implementation
+
+    constructor tai_cfi_base.create(ctyp: tcfikind);
+      begin
+        typ:=ait_cfi;
+        cfityp:=ctyp;
+      end;
+
+
+    constructor tai_cfi_op_val.create(ctyp: tcfikind; const a: aint);
+      begin
+        inherited create(ctyp);
+        val1:=a;
+      end;
+
+
+    constructor tai_cfi_op_string.create(ctyp: tcfikind; const str1: TSymStr);
+      begin
+        inherited create(ctyp);
+        s1:=str1;
+      end;
+
+
+    constructor tai_cfi_op_val_string.create(ctyp: tcfikind; const a: aint; const str: TSymStr);
+      begin
+        inherited create(ctyp,a);
+        s:=str;
+      end;
+
+
+    constructor tai_cfi_op_string_string.create(ctyp: tcfikind; const str1, str2: TSymStr);
+      begin
+        inherited create(ctyp,str1);
+        s2:=str2;
+      end;
+
+
+    constructor tai_cfi_op_reg.create(ctyp: tcfikind; r: tregister);
+      begin
+        inherited create(ctyp);
+        reg1:=r;
+      end;
+
+
+    constructor tai_cfi_op_reg_val.create(ctyp: tcfikind; r: tregister; a: aint);
+      begin
+        inherited create(ctyp,r);
+        val:=a;
+      end;
+
+
+    constructor tai_cfi_op_reg_reg.create(ctyp: tcfikind; r1, r2: tregister);
+      begin
+        inherited create(ctyp,r1);
+        reg2:=r2;
+      end;
+
+end.
+

+ 199 - 40
compiler/aasmcnst.pas

@@ -52,12 +52,13 @@ type
 
 
    { a simple data element; the value is stored as a tai }
    { a simple data element; the value is stored as a tai }
    tai_simpletypedconst = class(tai_abstracttypedconst)
    tai_simpletypedconst = class(tai_abstracttypedconst)
-   private
+    private
      procedure setval(AValue: tai);
      procedure setval(AValue: tai);
     protected
     protected
      fval: tai;
      fval: tai;
     public
     public
-     constructor create(_adetyp: ttypedconstkind; _def: tdef; _val: tai);
+     constructor create(_def: tdef; _val: tai);
+     destructor destroy; override;
      property val: tai read fval write setval;
      property val: tai read fval write setval;
    end;
    end;
 
 
@@ -69,7 +70,7 @@ type
      { iterator to walk over all individual items in the aggregate }
      { iterator to walk over all individual items in the aggregate }
      tadeenumerator = class(tobject)
      tadeenumerator = class(tobject)
       private
       private
-       fvalues: tfplist;
+       fvalues: tfpobjectlist;
        fvaluespos: longint;
        fvaluespos: longint;
        function getcurrent: tai_abstracttypedconst;
        function getcurrent: tai_abstracttypedconst;
       public
       public
@@ -80,7 +81,7 @@ type
      end;
      end;
 
 
     protected
     protected
-     fvalues: tfplist;
+     fvalues: tfpobjectlist;
      fisstring: boolean;
      fisstring: boolean;
 
 
      { converts the existing data to a single tai_string }
      { converts the existing data to a single tai_string }
@@ -89,10 +90,10 @@ type
     public
     public
      constructor create(_adetyp: ttypedconstkind; _fdef: tdef);
      constructor create(_adetyp: ttypedconstkind; _fdef: tdef);
      function getenumerator: tadeenumerator;
      function getenumerator: tadeenumerator;
-     procedure addvalue(val: tai_abstracttypedconst);
+     procedure addvalue(val: tai_abstracttypedconst); virtual;
      function valuecount: longint;
      function valuecount: longint;
      procedure insertvaluebeforepos(val: tai_abstracttypedconst; pos: longint);
      procedure insertvaluebeforepos(val: tai_abstracttypedconst; pos: longint);
-     function replacevalueatpos(val: tai_abstracttypedconst; pos: longint): tai_abstracttypedconst;
+     procedure replacevalueatpos(val: tai_abstracttypedconst; pos: longint);
      { change the type to a record, regardless of how the aggregate was created;
      { change the type to a record, regardless of how the aggregate was created;
        the size of the original type and the record must match }
        the size of the original type and the record must match }
      procedure changetorecord(_def: trecorddef);
      procedure changetorecord(_def: trecorddef);
@@ -268,6 +269,10 @@ type
      function aggregate_kind(def: tdef): ttypedconstkind; virtual;
      function aggregate_kind(def: tdef): ttypedconstkind; virtual;
      { finalize the asmlist: add the necessary symbols etc }
      { finalize the asmlist: add the necessary symbols etc }
      procedure finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions); virtual;
      procedure finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions); virtual;
+     procedure finalize_asmlist_add_indirect_sym(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions); virtual;
+     { prepare finalization (common for the default and overridden versions }
+     procedure finalize_asmlist_prepare(const options: ttcasmlistoptions; var alignment: shortint);
+
      { functionality of the above for vectorized dead strippable sections }
      { functionality of the above for vectorized dead strippable sections }
      procedure finalize_vectorized_dead_strip_asmlist(def: tdef; const basename, itemname: TSymStr; st: tsymtable; alignment: shortint; options: ttcasmlistoptions); virtual;
      procedure finalize_vectorized_dead_strip_asmlist(def: tdef; const basename, itemname: TSymStr; st: tsymtable; alignment: shortint; options: ttcasmlistoptions); virtual;
 
 
@@ -346,6 +351,12 @@ type
      { emits a tasmlabofs as returned by emit_*string_const }
      { emits a tasmlabofs as returned by emit_*string_const }
      procedure emit_string_offset(const ll: tasmlabofs; const strlength: longint; const st: tstringtype; const winlikewidestring: boolean; const charptrdef: tdef);virtual;
      procedure emit_string_offset(const ll: tasmlabofs; const strlength: longint; const st: tstringtype; const winlikewidestring: boolean; const charptrdef: tdef);virtual;
 
 
+     { emits a tasmlabofs as returned by begin_dynarray_const }
+     procedure emit_dynarray_offset(const ll:tasmlabofs;const arrlength:asizeint;const arrdef:tarraydef; const arrconstdatadef: trecorddef);virtual;
+     { starts a dynamic array constant so that its data can be emitted directly afterwards }
+     function begin_dynarray_const(arrdef:tdef;var startlab:tasmlabel;out arrlengthloc:ttypedconstplaceholder):tasmlabofs;virtual;
+     function end_dynarray_const(arrdef:tdef;arrlength:asizeint;arrlengthloc:ttypedconstplaceholder):tdef;virtual;
+
      { emit a shortstring constant, and return its def }
      { emit a shortstring constant, and return its def }
      function emit_shortstring_const(const str: shortstring): tdef;
      function emit_shortstring_const(const str: shortstring): tdef;
      { emit a pchar string constant (the characters, not a pointer to them), and return its def }
      { emit a pchar string constant (the characters, not a pointer to them), and return its def }
@@ -357,6 +368,9 @@ type
      { emit an ordinal constant }
      { emit an ordinal constant }
      procedure emit_ord_const(value: int64; def: tdef);
      procedure emit_ord_const(value: int64; def: tdef);
 
 
+     { emit a reference to a pooled shortstring constant }
+     procedure emit_pooled_shortstring_const_ref(const str:shortstring);
+
      { begin a potential aggregate type. Must be called for any type
      { begin a potential aggregate type. Must be called for any type
        that consists of multiple tai constant data entries, or that
        that consists of multiple tai constant data entries, or that
        represents an aggregate at the Pascal level (a record, a non-dynamic
        represents an aggregate at the Pascal level (a record, a non-dynamic
@@ -379,7 +393,7 @@ type
         maxcrecordalign: specify maximum C record alignment (no equivalent in
         maxcrecordalign: specify maximum C record alignment (no equivalent in
           source code)
           source code)
      }
      }
-     function begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin, maxcrecordalign: shortint): trecorddef; virtual;
+     function begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin: shortint): trecorddef; virtual;
      function end_anonymous_record: trecorddef; virtual;
      function end_anonymous_record: trecorddef; virtual;
 
 
      { add a placeholder element at the current position that later can be
      { add a placeholder element at the current position that later can be
@@ -442,6 +456,12 @@ type
        supported this is equal to the header size }
        supported this is equal to the header size }
      class function get_string_symofs(typ: tstringtype; winlikewidestring: boolean): pint; virtual;
      class function get_string_symofs(typ: tstringtype; winlikewidestring: boolean): pint; virtual;
 
 
+     { returns the offset of the array data relatve to dynamic array constant
+       labels. On most platforms, this is 0 (with the header at a negative
+       offset), but on some platforms such negative offsets are not supported
+       and thus this is equal to the header size }
+     class function get_dynarray_symofs:pint;virtual;
+
      { set the fieldvarsym whose data we will emit next; needed
      { set the fieldvarsym whose data we will emit next; needed
        in case of variant records, so we know which part of the variant gets
        in case of variant records, so we know which part of the variant gets
        initialised. Also in case of objects, because the fieldvarsyms are spread
        initialised. Also in case of objects, because the fieldvarsyms are spread
@@ -451,9 +471,10 @@ type
        record (also if that field is a nested anonymous record) }
        record (also if that field is a nested anonymous record) }
      property next_field_name: TIDString write set_next_field_name;
      property next_field_name: TIDString write set_next_field_name;
     protected
     protected
-     { this one always return the actual offset, called by the above (and
+     { these ones always return the actual offset, called by the above (and
        overridden versions) }
        overridden versions) }
      class function get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
      class function get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
+     class function get_dynarray_header_size:pint;
    end;
    end;
    ttai_typedconstbuilderclass = class of ttai_typedconstbuilder;
    ttai_typedconstbuilderclass = class of ttai_typedconstbuilder;
 
 
@@ -491,7 +512,7 @@ implementation
      cutils,
      cutils,
      verbose,globals,systems,widestr,
      verbose,globals,systems,widestr,
      fmodule,
      fmodule,
-     symtable,defutil;
+     symtable,symutil,defutil;
 
 
 {****************************************************************************
 {****************************************************************************
                        taggregateinformation
                        taggregateinformation
@@ -568,8 +589,7 @@ implementation
             repeat
             repeat
               inc(i);
               inc(i);
               sym:=tsym(tabstractrecorddef(def).symtable.symlist[i]);
               sym:=tsym(tabstractrecorddef(def).symtable.symlist[i]);
-            until (sym.typ=fieldvarsym) and
-              not(sp_static in sym.symoptions);
+            until is_normal_fieldvarsym(sym);
             curfield:=tfieldvarsym(sym);
             curfield:=tfieldvarsym(sym);
             nextoffset:=curfield.fieldoffset;
             nextoffset:=curfield.fieldoffset;
             curindex:=i;
             curindex:=i;
@@ -619,13 +639,20 @@ implementation
       end;
       end;
 
 
 
 
-   constructor tai_simpletypedconst.create(_adetyp: ttypedconstkind; _def: tdef; _val: tai);
+   constructor tai_simpletypedconst.create(_def: tdef; _val: tai);
      begin
      begin
-       inherited create(_adetyp,_def);
+       inherited create(tck_simple,_def);
        fval:=_val;
        fval:=_val;
      end;
      end;
 
 
 
 
+   destructor tai_simpletypedconst.destroy;
+     begin
+       fval.free;
+       inherited destroy;
+     end;
+
+
 {****************************************************************************
 {****************************************************************************
               tai_aggregatetypedconst.tadeenumerator
               tai_aggregatetypedconst.tadeenumerator
  ****************************************************************************}
  ****************************************************************************}
@@ -682,7 +709,7 @@ implementation
        { the "nil" def will be replaced with an array def of the appropriate
        { the "nil" def will be replaced with an array def of the appropriate
          size once we're finished adding data, so we don't create intermediate
          size once we're finished adding data, so we don't create intermediate
          arraydefs all the time }
          arraydefs all the time }
-       fvalues.add(tai_simpletypedconst.create(tck_simple,nil,newstr));
+       fvalues.add(tai_simpletypedconst.create(nil,newstr));
      end;
      end;
 
 
    procedure tai_aggregatetypedconst.add_to_string(strtai: tai_string; othertai: tai);
    procedure tai_aggregatetypedconst.add_to_string(strtai: tai_string; othertai: tai);
@@ -716,7 +743,7 @@ implementation
      begin
      begin
        inherited;
        inherited;
        fisstring:=false;
        fisstring:=false;
-       fvalues:=tfplist.create;
+       fvalues:=tfpobjectlist.create(true);
      end;
      end;
 
 
 
 
@@ -766,9 +793,9 @@ implementation
      end;
      end;
 
 
 
 
-   function tai_aggregatetypedconst.replacevalueatpos(val: tai_abstracttypedconst; pos: longint): tai_abstracttypedconst;
+   procedure tai_aggregatetypedconst.replacevalueatpos(val: tai_abstracttypedconst; pos: longint);
      begin
      begin
-       result:=tai_abstracttypedconst(fvalues[pos]);
+       { since fvalues owns its elements, it will automatically free the old value }
        fvalues[pos]:=val;
        fvalues[pos]:=val;
      end;
      end;
 
 
@@ -910,13 +937,7 @@ implementation
      end;
      end;
 
 
 
 
-   procedure ttai_typedconstbuilder.finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
-     var
-       prelist: tasmlist;
-       ptrdef : tdef;
-       symind : tasmsymbol;
-       indtcb : ttai_typedconstbuilder;
-       indsecname : tsymstr;
+   procedure ttai_typedconstbuilder.finalize_asmlist_prepare(const options: ttcasmlistoptions; var alignment: shortint);
      begin
      begin
        if tcalo_apply_constalign in options then
        if tcalo_apply_constalign in options then
          alignment:=const_align(alignment);
          alignment:=const_align(alignment);
@@ -932,7 +953,14 @@ implementation
              tcalo_vectorized_dead_strip_end]*options)<>[]) and
              tcalo_vectorized_dead_strip_end]*options)<>[]) and
           not fvectorized_finalize_called then
           not fvectorized_finalize_called then
          internalerror(2015110602);
          internalerror(2015110602);
+     end;
+
 
 
+   procedure ttai_typedconstbuilder.finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
+     var
+       prelist: tasmlist;
+     begin
+       finalize_asmlist_prepare(options, alignment);
        prelist:=tasmlist.create;
        prelist:=tasmlist.create;
        { only now add items based on the symbolname, because it may be
        { only now add items based on the symbolname, because it may be
          modified by the "section" specifier in case of a typed constant }
          modified by the "section" specifier in case of a typed constant }
@@ -949,7 +977,14 @@ implementation
            new_section(prelist,section,secname,alignment);
            new_section(prelist,section,secname,alignment);
          end
          end
        else if tcalo_new_section in options then
        else if tcalo_new_section in options then
-         new_section(prelist,section,secname,alignment)
+         begin
+           { insert ait_cutobject for smart-linking on targets
+             that do not support smarlinking based on sections,
+             like msdos }
+           if not (tf_smartlink_sections in target_info.flags) then
+             maybe_new_object_file(prelist);
+           new_section(prelist,section,secname,alignment);
+         end
        else
        else
          prelist.concat(cai_align.Create(alignment));
          prelist.concat(cai_align.Create(alignment));
 
 
@@ -987,11 +1022,19 @@ implementation
        fasmlist.concat(tai_symbol_end.Createname(sym.name));
        fasmlist.concat(tai_symbol_end.Createname(sym.name));
        { free the temporary list }
        { free the temporary list }
        prelist.free;
        prelist.free;
+     end;
+
 
 
+   procedure ttai_typedconstbuilder.finalize_asmlist_add_indirect_sym(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
+     var
+       ptrdef : tdef;
+       symind : tasmsymbol;
+       indtcb : ttai_typedconstbuilder;
+       indsecname : tsymstr;
+     begin
        if (tcalo_data_force_indirect in options) and
        if (tcalo_data_force_indirect in options) and
-           not fvectorized_finalize_called and
-           (sym.bind in [AB_GLOBAL,AB_COMMON]) and
-           (sym.typ=AT_DATA) then
+          (sym.bind in [AB_GLOBAL,AB_COMMON]) and
+          (sym.typ=AT_DATA) then
          begin
          begin
            ptrdef:=cpointerdef.getreusable(def);
            ptrdef:=cpointerdef.getreusable(def);
            symind:=current_asmdata.DefineAsmSymbol(sym.name,AB_INDIRECT,AT_DATA,ptrdef);
            symind:=current_asmdata.DefineAsmSymbol(sym.name,AB_INDIRECT,AT_DATA,ptrdef);
@@ -1067,6 +1110,7 @@ implementation
              secname:=make_mangledname(basename,st,'2_'+itemname);
              secname:=make_mangledname(basename,st,'2_'+itemname);
            exclude(options,tcalo_vectorized_dead_strip_item);
            exclude(options,tcalo_vectorized_dead_strip_item);
          end;
          end;
+       current_module.linkorderedsymbols.concat(sym.Name);
        finalize_asmlist(sym,def,sectype,secname,alignment,options);
        finalize_asmlist(sym,def,sectype,secname,alignment,options);
      end;
      end;
 
 
@@ -1083,6 +1127,7 @@ implementation
        if not fasmlist_finalized then
        if not fasmlist_finalized then
          begin
          begin
            finalize_asmlist(sym,def,section,secname,alignment,foptions);
            finalize_asmlist(sym,def,section,secname,alignment,foptions);
+           finalize_asmlist_add_indirect_sym(sym,def,section,secname,alignment,foptions);
            fasmlist_finalized:=true;
            fasmlist_finalized:=true;
          end;
          end;
        result:=fasmlist;
        result:=fasmlist;
@@ -1110,6 +1155,16 @@ implementation
      end;
      end;
 
 
 
 
+   class function ttai_typedconstbuilder.get_dynarray_symofs:pint;
+     begin
+       { darwin's linker does not support negative offsets }
+       if not (target_info.system in systems_darwin) then
+         result:=0
+       else
+         result:=get_dynarray_header_size;
+     end;
+
+
    class function ttai_typedconstbuilder.get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
    class function ttai_typedconstbuilder.get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
      var
      var
        ansistring_header_size: pint;
        ansistring_header_size: pint;
@@ -1145,6 +1200,16 @@ implementation
      end;
      end;
 
 
 
 
+   class function ttai_typedconstbuilder.get_dynarray_header_size:pint;
+     begin
+       result:=
+         { reference count }
+         ptrsinttype.size +
+         { high value }
+         sizesinttype.size;
+     end;
+
+
    constructor ttai_typedconstbuilder.create(const options: ttcasmlistoptions);
    constructor ttai_typedconstbuilder.create(const options: ttcasmlistoptions);
      begin
      begin
        inherited create;
        inherited create;
@@ -1331,7 +1396,7 @@ implementation
        result.ofs:=0;
        result.ofs:=0;
        { pack the data, so that we don't add unnecessary null bytes after the
        { pack the data, so that we don't add unnecessary null bytes after the
          constant string }
          constant string }
-       begin_anonymous_record('$'+get_dynstring_rec_name(stringtype,false,len),1,sizeof(TConstPtrUInt),1,1);
+       begin_anonymous_record('$'+get_dynstring_rec_name(stringtype,false,len),1,sizeof(TConstPtrUInt),1);
        string_symofs:=get_string_symofs(stringtype,false);
        string_symofs:=get_string_symofs(stringtype,false);
        { encoding }
        { encoding }
        emit_tai(tai_const.create_16bit(encoding),u16inttype);
        emit_tai(tai_const.create_16bit(encoding),u16inttype);
@@ -1543,7 +1608,7 @@ implementation
        if (typ<>st_widestring) or
        if (typ<>st_widestring) or
           not winlike then
           not winlike then
          begin
          begin
-           result:=crecorddef.create_global_internal('$'+name,1,1,1);
+           result:=crecorddef.create_global_internal('$'+name,1,1);
            { encoding }
            { encoding }
            result.add_field_by_def('',u16inttype);
            result.add_field_by_def('',u16inttype);
            { element size }
            { element size }
@@ -1569,8 +1634,7 @@ implementation
        else
        else
          begin
          begin
            result:=crecorddef.create_global_internal('$'+name,4,
            result:=crecorddef.create_global_internal('$'+name,4,
-             targetinfos[target_info.system]^.alignment.recordalignmin,
-             targetinfos[target_info.system]^.alignment.maxCrecordalign);
+             targetinfos[target_info.system]^.alignment.recordalignmin);
            { length in bytes }
            { length in bytes }
            result.add_field_by_def('',s32inttype);
            result.add_field_by_def('',s32inttype);
            streledef:=cwidechartype;
            streledef:=cwidechartype;
@@ -1601,7 +1665,7 @@ implementation
        datatcb.emit_tai(tai_string.create_pchar(s,len+1),datadef);
        datatcb.emit_tai(tai_string.create_pchar(s,len+1),datadef);
        datatcb.maybe_end_aggregate(datadef);
        datatcb.maybe_end_aggregate(datadef);
        ansistrrecdef:=datatcb.end_anonymous_record;
        ansistrrecdef:=datatcb.end_anonymous_record;
-       finish_internal_data_builder(datatcb,startlab,ansistrrecdef,const_align(sizeof(pointer)));
+       finish_internal_data_builder(datatcb,startlab,ansistrrecdef,const_align(voidpointertype.alignment));
      end;
      end;
 
 
 
 
@@ -1621,8 +1685,7 @@ implementation
            result.lab:=startlab;
            result.lab:=startlab;
            datatcb.begin_anonymous_record('$'+get_dynstring_rec_name(st_widestring,true,strlength),
            datatcb.begin_anonymous_record('$'+get_dynstring_rec_name(st_widestring,true,strlength),
              4,4,
              4,4,
-             targetinfos[target_info.system]^.alignment.recordalignmin,
-             targetinfos[target_info.system]^.alignment.maxCrecordalign);
+             targetinfos[target_info.system]^.alignment.recordalignmin);
            datatcb.emit_tai(Tai_const.Create_32bit(strlength*cwidechartype.size),s32inttype);
            datatcb.emit_tai(Tai_const.Create_32bit(strlength*cwidechartype.size),s32inttype);
            { can we optimise by placing the string constant label at the
            { can we optimise by placing the string constant label at the
              required offset? }
              required offset? }
@@ -1657,7 +1720,7 @@ implementation
        else
        else
          { code generation for other sizes must be written }
          { code generation for other sizes must be written }
          internalerror(200904271);
          internalerror(200904271);
-       finish_internal_data_builder(datatcb,startlab,unicodestrrecdef,const_align(sizeof(pint)));
+       finish_internal_data_builder(datatcb,startlab,unicodestrrecdef,const_align(voidpointertype.alignment));
      end;
      end;
 
 
 
 
@@ -1667,6 +1730,52 @@ implementation
      end;
      end;
 
 
 
 
+   procedure ttai_typedconstbuilder.emit_dynarray_offset(const ll:tasmlabofs;const arrlength:asizeint;const arrdef:tarraydef; const arrconstdatadef: trecorddef);
+     begin
+       emit_tai(tai_const.create_sym_offset(ll.lab,ll.ofs),arrdef);
+     end;
+
+
+   function ttai_typedconstbuilder.begin_dynarray_const(arrdef:tdef;var startlab:tasmlabel;out arrlengthloc:ttypedconstplaceholder):tasmlabofs;
+     var
+       dynarray_symofs: asizeint;
+     begin
+       result.lab:=startlab;
+       result.ofs:=0;
+       { pack the data, so that we don't add unnecessary null bytes after the
+         constant string }
+       begin_anonymous_record('',1,sizeof(TConstPtrUInt),1);
+       dynarray_symofs:=get_dynarray_symofs;
+       { what to do if ptrsinttype <> sizesinttype??? }
+       emit_tai(tai_const.create_sizeint(-1),ptrsinttype);
+       inc(result.ofs,ptrsinttype.size);
+       arrlengthloc:=emit_placeholder(sizesinttype);
+       inc(result.ofs,sizesinttype.size);
+       if dynarray_symofs=0 then
+         begin
+           { results in slightly more efficient code }
+           emit_tai(tai_label.create(result.lab),arrdef);
+           result.ofs:=0;
+           { create new label of the same kind (including whether or not the
+             name starts with target_asm.labelprefix in case it's AB_LOCAL,
+             so we keep the difference depending on whether the original was
+             allocated via getstatic/getlocal/getglobal datalabel) }
+           startlab:=tasmlabel.create(current_asmdata.AsmSymbolDict,startlab.name+'$dynarrlab',startlab.bind,startlab.typ);
+         end;
+       { sanity check }
+       if result.ofs<>dynarray_symofs then
+         internalerror(2018020601);
+     end;
+
+
+   function ttai_typedconstbuilder.end_dynarray_const(arrdef:tdef;arrlength:asizeint;arrlengthloc:ttypedconstplaceholder):tdef;
+     begin
+       { we emit the high value, not the count }
+       arrlengthloc.replace(tai_const.Create_sizeint(arrlength-1),sizesinttype);
+       result:=end_anonymous_record;
+     end;
+
+
    function ttai_typedconstbuilder.emit_shortstring_const(const str: shortstring): tdef;
    function ttai_typedconstbuilder.emit_shortstring_const(const str: shortstring): tdef;
      begin
      begin
        { we use an arraydef instead of a shortstringdef, because we don't have
        { we use an arraydef instead of a shortstringdef, because we don't have
@@ -1752,6 +1861,56 @@ implementation
      end;
      end;
 
 
 
 
+   procedure ttai_typedconstbuilder.emit_pooled_shortstring_const_ref(const str:shortstring);
+     var
+       pool : thashset;
+       entry : phashsetitem;
+       strlab : tasmlabel;
+       l : longint;
+       pc : pansichar;
+       datadef : tdef;
+       strtcb : ttai_typedconstbuilder;
+     begin
+       pool:=current_asmdata.ConstPools[sp_shortstr];
+
+       entry:=pool.FindOrAdd(@str[1],length(str));
+
+       { :-(, we must generate a new entry }
+       if not assigned(entry^.Data) then
+         begin
+           current_asmdata.getglobaldatalabel(strlab);
+
+           { include length and terminating zero for quick conversion to pchar }
+           l:=length(str);
+           getmem(pc,l+2);
+           move(str[1],pc[1],l);
+           pc[0]:=chr(l);
+           pc[l+1]:=#0;
+
+           datadef:=carraydef.getreusable(cansichartype,l+2);
+
+           { we start a new constbuilder as we don't know whether we're called
+             from inside an internal constbuilder }
+           strtcb:=ctai_typedconstbuilder.create([tcalo_is_lab,tcalo_make_dead_strippable,tcalo_apply_constalign]);
+
+           strtcb.maybe_begin_aggregate(datadef);
+           strtcb.emit_tai(Tai_string.Create_pchar(pc,l+2),datadef);
+           strtcb.maybe_end_aggregate(datadef);
+
+           current_asmdata.asmlists[al_typedconsts].concatList(
+             strtcb.get_final_asmlist(strlab,datadef,sec_rodata_norel,strlab.name,const_align(sizeof(pint)))
+           );
+           strtcb.free;
+
+           entry^.Data:=strlab;
+         end
+       else
+         strlab:=tasmlabel(entry^.Data);
+
+       emit_tai(tai_const.Create_sym(strlab),charpointertype);
+     end;
+
+
    procedure ttai_typedconstbuilder.maybe_begin_aggregate(def: tdef);
    procedure ttai_typedconstbuilder.maybe_begin_aggregate(def: tdef);
      begin
      begin
        begin_aggregate_internal(def,false);
        begin_aggregate_internal(def,false);
@@ -1764,7 +1923,7 @@ implementation
      end;
      end;
 
 
 
 
-   function ttai_typedconstbuilder.begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin, maxcrecordalign: shortint): trecorddef;
+   function ttai_typedconstbuilder.begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin: shortint): trecorddef;
      var
      var
        anonrecorddef: trecorddef;
        anonrecorddef: trecorddef;
        typesym: ttypesym;
        typesym: ttypesym;
@@ -1785,7 +1944,7 @@ implementation
              end;
              end;
          end;
          end;
        { create skeleton def }
        { create skeleton def }
-       anonrecorddef:=crecorddef.create_global_internal(optionalname,packrecords,recordalignmin,maxcrecordalign);
+       anonrecorddef:=crecorddef.create_global_internal(optionalname,packrecords,recordalignmin);
        trecordsymtable(anonrecorddef.symtable).recordalignment:=recordalign;
        trecordsymtable(anonrecorddef.symtable).recordalignment:=recordalign;
        { generic aggregate housekeeping }
        { generic aggregate housekeeping }
        begin_aggregate_internal(anonrecorddef,true);
        begin_aggregate_internal(anonrecorddef,true);
@@ -1924,7 +2083,7 @@ implementation
          begin
          begin
            sym:=search_struct_member_no_helper(tabstractrecorddef(curdef),fields[i]);
            sym:=search_struct_member_no_helper(tabstractrecorddef(curdef),fields[i]);
            if not assigned(sym) or
            if not assigned(sym) or
-              (sym.typ<>fieldvarsym) or
+              not is_normal_fieldvarsym(sym) or
               ((i<>high(fields)) and
               ((i<>high(fields)) and
                not(tfieldvarsym(sym).vardef.typ in [objectdef,recorddef])) then
                not(tfieldvarsym(sym).vardef.typ in [objectdef,recorddef])) then
              internalerror(2015071505);
              internalerror(2015071505);
@@ -2040,7 +2199,7 @@ implementation
    procedure tlowleveltypedconstplaceholder.replace(ai: tai; d: tdef);
    procedure tlowleveltypedconstplaceholder.replace(ai: tai; d: tdef);
      begin
      begin
        if d<>def then
        if d<>def then
-         internalerror(2015091001);
+         internalerror(2015091007);
        list.insertafter(ai,insertpos);
        list.insertafter(ai,insertpos);
        list.remove(insertpos);
        list.remove(insertpos);
        insertpos.free;
        insertpos.free;

+ 112 - 4
compiler/aasmdata.pas

@@ -96,7 +96,8 @@ interface
          sp_objcprotocolrefs,
          sp_objcprotocolrefs,
          sp_varsets,
          sp_varsets,
          sp_floats,
          sp_floats,
-         sp_guids
+         sp_guids,
+         sp_paraloc
       );
       );
       
       
     const
     const
@@ -134,6 +135,22 @@ interface
          section_count : longint;
          section_count : longint;
          constructor create;
          constructor create;
          function  getlasttaifilepos : pfileposinfo;
          function  getlasttaifilepos : pfileposinfo;
+         { inserts another List at the begin and make this List empty }
+         procedure insertList(p : TLinkedList); override;
+         { inserts another List before the provided item and make this List empty }
+         procedure insertListBefore(Item:TLinkedListItem;p : TLinkedList); override;
+         { inserts another List after the provided item and make this List empty }
+         procedure insertListAfter(Item:TLinkedListItem;p : TLinkedList); override;
+         { concats another List at the end and make this List empty }
+         procedure concatList(p : TLinkedList); override;
+         { concats another List at the start and makes a copy
+           the list is ordered in reverse.
+         }
+         procedure insertListcopy(p : TLinkedList); override;
+         { concats another List at the end and makes a copy }
+         procedure concatListcopy(p : TLinkedList); override;
+         { removes all items from the list, the items are not freed }
+         procedure RemoveAll; override;
       end;
       end;
 
 
       TAsmCFI=class
       TAsmCFI=class
@@ -143,10 +160,13 @@ interface
         procedure generate_code(list:TAsmList);virtual;
         procedure generate_code(list:TAsmList);virtual;
         procedure start_frame(list:TAsmList);virtual;
         procedure start_frame(list:TAsmList);virtual;
         procedure end_frame(list:TAsmList);virtual;
         procedure end_frame(list:TAsmList);virtual;
+        procedure outmost_frame(list:TAsmList);virtual;
         procedure cfa_offset(list:TAsmList;reg:tregister;ofs:longint);virtual;
         procedure cfa_offset(list:TAsmList;reg:tregister;ofs:longint);virtual;
         procedure cfa_restore(list:TAsmList;reg:tregister);virtual;
         procedure cfa_restore(list:TAsmList;reg:tregister);virtual;
         procedure cfa_def_cfa_register(list:TAsmList;reg:tregister);virtual;
         procedure cfa_def_cfa_register(list:TAsmList;reg:tregister);virtual;
         procedure cfa_def_cfa_offset(list:TAsmList;ofs:longint);virtual;
         procedure cfa_def_cfa_offset(list:TAsmList;ofs:longint);virtual;
+        function get_frame_start: TAsmLabel;virtual;
+        function get_cfa_list : TAsmList;virtual;
       end;
       end;
       TAsmCFIClass=class of TAsmCFI;
       TAsmCFIClass=class of TAsmCFI;
 
 
@@ -178,6 +198,7 @@ interface
         { asmsymbol }
         { asmsymbol }
         function  DefineAsmSymbolByClass(symclass: TAsmSymbolClass; const s : TSymStr;_bind:TAsmSymBind;_typ:Tasmsymtype; def: tdef) : TAsmSymbol; virtual;
         function  DefineAsmSymbolByClass(symclass: TAsmSymbolClass; const s : TSymStr;_bind:TAsmSymBind;_typ:Tasmsymtype; def: tdef) : TAsmSymbol; virtual;
         function  DefineAsmSymbol(const s : TSymStr;_bind:TAsmSymBind;_typ:Tasmsymtype; def: tdef) : TAsmSymbol;
         function  DefineAsmSymbol(const s : TSymStr;_bind:TAsmSymBind;_typ:Tasmsymtype; def: tdef) : TAsmSymbol;
+        function  DefineProcAsmSymbol(pd: tdef; const s: TSymStr; global: boolean): TAsmSymbol;
         function  WeakRefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype) : TAsmSymbol;
         function  WeakRefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype) : TAsmSymbol;
         function  RefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype;indirect:boolean=false) : TAsmSymbol;
         function  RefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype;indirect:boolean=false) : TAsmSymbol;
         function  GetAsmSymbol(const s : TSymStr) : TAsmSymbol;
         function  GetAsmSymbol(const s : TSymStr) : TAsmSymbol;
@@ -228,6 +249,7 @@ implementation
 
 
     uses
     uses
       verbose,
       verbose,
+      globals,
       symconst,
       symconst,
       aasmtai;
       aasmtai;
 
 
@@ -268,6 +290,11 @@ implementation
       end;
       end;
 
 
 
 
+    procedure TAsmCFI.outmost_frame(list: TAsmList);
+      begin
+      end;
+
+
     procedure TAsmCFI.cfa_offset(list:TAsmList;reg:tregister;ofs:longint);
     procedure TAsmCFI.cfa_offset(list:TAsmList;reg:tregister;ofs:longint);
       begin
       begin
       end;
       end;
@@ -287,6 +314,18 @@ implementation
       begin
       begin
       end;
       end;
 
 
+
+    function TAsmCFI.get_frame_start: TAsmLabel;
+      begin
+        Result:=nil;
+      end;
+
+
+    function TAsmCFI.get_cfa_list: TAsmList;
+      begin
+        Result:=nil;
+      end;
+
 {*****************************************************************************
 {*****************************************************************************
                                  TTCInitItem
                                  TTCInitItem
 *****************************************************************************}
 *****************************************************************************}
@@ -337,6 +376,59 @@ implementation
       end;
       end;
 
 
 
 
+    procedure TAsmList.insertList(p : TLinkedList);
+      begin
+        inherited insertList(p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListBefore(Item : TLinkedListItem; p : TLinkedList);
+      begin
+        inherited insertListBefore(Item,p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListAfter(Item : TLinkedListItem; p : TLinkedList);
+      begin
+        inherited insertListAfter(Item,p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.concatList(p : TLinkedList);
+      begin
+        inherited concatList(p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListcopy(p : TLinkedList);
+      begin
+        inherited insertListcopy(p);
+        inc(section_count,TAsmList(p).section_count);
+     end;
+
+
+    procedure TAsmList.concatListcopy(p : TLinkedList);
+      begin
+        inherited concatListcopy(p);
+        inc(section_count,TAsmList(p).section_count);
+      end;
+
+
+    procedure TAsmList.RemoveAll;
+      begin
+         inherited RemoveAll;
+         section_count:=0;
+      end;
+
+
 {****************************************************************************
 {****************************************************************************
                                 TAsmData
                                 TAsmData
 ****************************************************************************}
 ****************************************************************************}
@@ -423,8 +515,8 @@ implementation
         CurrAsmList:=TAsmList.create;
         CurrAsmList:=TAsmList.create;
         for hal:=low(TAsmListType) to high(TAsmListType) do
         for hal:=low(TAsmListType) to high(TAsmListType) do
           AsmLists[hal]:=TAsmList.create;
           AsmLists[hal]:=TAsmList.create;
-        WideInits :=TLinkedList.create;
-        ResStrInits:=TLinkedList.create;
+        WideInits :=TAsmList.create;
+        ResStrInits:=TAsmList.create;
         { CFI }
         { CFI }
         FAsmCFI:=CAsmCFI.Create;
         FAsmCFI:=CAsmCFI.Create;
       end;
       end;
@@ -482,6 +574,21 @@ implementation
       end;
       end;
 
 
 
 
+    function TAsmData.DefineProcAsmSymbol(pd: tdef; const s: TSymStr; global: boolean): TAsmSymbol;
+      begin
+        { The condition to use global or local symbol must match
+          the code written in hlcg.gen_proc_symbol to
+          avoid change from AB_LOCAL to AB_GLOBAL, which generates
+          erroneous code (at least for targets using GOT) }
+        if global or
+           (cs_profile in current_settings.moduleswitches) then
+          result:=DefineAsmSymbol(s,AB_GLOBAL,AT_FUNCTION,pd)
+        else if tf_supports_hidden_symbols in target_info.flags then
+          result:=DefineAsmSymbol(s,AB_PRIVATE_EXTERN,AT_FUNCTION,pd)
+        else
+          result:=DefineAsmSymbol(s,AB_LOCAL,AT_FUNCTION,pd);
+      end;
+
     function TAsmData.RefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype;indirect:boolean) : TAsmSymbol;
     function TAsmData.RefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype;indirect:boolean) : TAsmSymbol;
       var
       var
         namestr : TSymStr;
         namestr : TSymStr;
@@ -604,7 +711,8 @@ initialization
   memasmlists:=TMemDebug.create('AsmLists');
   memasmlists:=TMemDebug.create('AsmLists');
   memasmlists.stop;
   memasmlists.stop;
 {$endif MEMDEBUG}
 {$endif MEMDEBUG}
-  CAsmCFI:=TAsmCFI;
+  if not(assigned(CAsmCFI)) then
+    CAsmCFI:=TAsmCFI;
 
 
 finalization
 finalization
 {$ifdef MEMDEBUG}
 {$ifdef MEMDEBUG}

+ 1 - 1
compiler/aasmdef.pas

@@ -56,7 +56,7 @@ function TAsmDataDef.DefineAsmSymbolByClass(symclass: TAsmSymbolClass; const s:
     result:=DefineAsmSymbolByClassBase(symclass,s,_bind,_typ,def,wasdefined);
     result:=DefineAsmSymbolByClassBase(symclass,s,_bind,_typ,def,wasdefined);
     { define the indirect asmsymbol if necessary }
     { define the indirect asmsymbol if necessary }
     if not wasdefined and
     if not wasdefined and
-       (_bind in [AB_GLOBAL,AB_COMMON]) and
+       (_bind in [AB_GLOBAL,AB_COMMON,AB_PRIVATE_EXTERN]) and
        (_typ<>AT_DATA_NOINDIRECT) and
        (_typ<>AT_DATA_NOINDIRECT) and
        (((_typ=AT_DATA) and
        (((_typ=AT_DATA) and
          (tf_supports_packages in target_info.flags) and
          (tf_supports_packages in target_info.flags) and

+ 4 - 0
compiler/aasmsym.pas

@@ -53,6 +53,8 @@ implementation
         case o.typ of
         case o.typ of
           top_local :
           top_local :
             o.localoper^.localsymderef.build(tlocalvarsym(o.localoper^.localsym));
             o.localoper^.localsymderef.build(tlocalvarsym(o.localoper^.localsym));
+          else
+            ;
         end;
         end;
       end;
       end;
 
 
@@ -65,6 +67,8 @@ implementation
             end;
             end;
           top_local :
           top_local :
             o.localoper^.localsym:=tlocalvarsym(o.localoper^.localsymderef.resolve);
             o.localoper^.localsym:=tlocalvarsym(o.localoper^.localsymderef.resolve);
+          else
+            ;
         end;
         end;
       end;
       end;
 
 

+ 375 - 62
compiler/aasmtai.pas

@@ -87,9 +87,15 @@ interface
           ait_llvmins, { llvm instruction }
           ait_llvmins, { llvm instruction }
           ait_llvmalias, { alias for a symbol }
           ait_llvmalias, { alias for a symbol }
           ait_llvmdecl, { llvm symbol declaration (global/external variable, external procdef) }
           ait_llvmdecl, { llvm symbol declaration (global/external variable, external procdef) }
+          ait_llvmmetadatanode, (* llvm metadata node: !id = !{type value, ...} *)
+          ait_llvmmetadatareftypedconst, { reference to metadata inside a metadata constant }
+          ait_llvmmetadatarefoperand, { llvm metadata referece: !metadataname !id }
 {$endif}
 {$endif}
           { SEH directives used in ARM,MIPS and x86_64 COFF targets }
           { SEH directives used in ARM,MIPS and x86_64 COFF targets }
-          ait_seh_directive
+          ait_seh_directive,
+          { Dwarf CFI directive }
+          ait_cfi,
+          ait_eabi_attribute
           );
           );
 
 
         taiconst_type = (
         taiconst_type = (
@@ -145,7 +151,14 @@ interface
           { offset of symbol's GOT slot in GOT }
           { offset of symbol's GOT slot in GOT }
           aitconst_got,
           aitconst_got,
           { offset of symbol itself from GOT }
           { offset of symbol itself from GOT }
-          aitconst_gotoff_symbol
+          aitconst_gotoff_symbol,
+          { offset in TLS block }
+          aitconst_dtpoff,
+          { ARM TLS code }
+          aitconst_gottpoff,
+          aitconst_tpoff,
+          aitconst_tlsgd,
+          aitconst_tlsdesc
         );
         );
 
 
         tairealconsttype = (
         tairealconsttype = (
@@ -216,8 +229,13 @@ interface
           'llvmins',
           'llvmins',
           'llvmalias',
           'llvmalias',
           'llvmdecl',
           'llvmdecl',
+          'llvmmetadata',
+          'llvmmetadatareftc',
+          'llvmmetadatarefop',
 {$endif}
 {$endif}
-          'seh_directive'
+          'cfi',
+          'seh_directive',
+          'eabi_attribute'
           );
           );
 
 
     type
     type
@@ -232,6 +250,7 @@ interface
 {$if defined(arm) or defined(aarch64)}
 {$if defined(arm) or defined(aarch64)}
        ,top_conditioncode
        ,top_conditioncode
        ,top_shifterop
        ,top_shifterop
+       ,top_realconst
 {$endif defined(arm) or defined(aarch64)}
 {$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
 {$ifdef m68k}
        { m68k only }
        { m68k only }
@@ -260,7 +279,12 @@ interface
        ,top_cond
        ,top_cond
        ,top_para
        ,top_para
        ,top_asmlist
        ,top_asmlist
+       ,top_callingconvention
 {$endif llvm}
 {$endif llvm}
+{$if defined(riscv32) or defined(riscv64)}
+       ,top_fenceflags
+       ,top_roundingmode
+{$endif defined(riscv32) or defined(riscv64)}
        );
        );
 
 
       { kinds of operations that an instruction can perform on an operand }
       { kinds of operations that an instruction can perform on an operand }
@@ -270,6 +294,9 @@ interface
         localsym : pointer;
         localsym : pointer;
         localsymderef : tderef;
         localsymderef : tderef;
         localsymofs : longint;
         localsymofs : longint;
+{$ifdef x86}
+        localsegment,
+{$endif x86}
         localindexreg : tregister;
         localindexreg : tregister;
         localscale : byte;
         localscale : byte;
         localgetoffset,
         localgetoffset,
@@ -307,8 +334,13 @@ interface
 {$endif JVM}
 {$endif JVM}
 {$ifdef llvm}
 {$ifdef llvm}
                      ait_llvmdecl,
                      ait_llvmdecl,
+                     ait_llvmmetadatanode,
+                     ait_llvmmetadatareftypedconst,
+                     ait_llvmmetadatarefoperand,
 {$endif llvm}
 {$endif llvm}
-                     ait_seh_directive
+                     ait_seh_directive,
+                     ait_cfi,
+                     ait_eabi_attribute
                     ];
                     ];
 
 
 
 
@@ -354,7 +386,11 @@ interface
           available on the specified CPU; this represents directives such as
           available on the specified CPU; this represents directives such as
           NASM's 'CPU 686' or MASM/TASM's '.686p'. Might not be supported by
           NASM's 'CPU 686' or MASM/TASM's '.686p'. Might not be supported by
           all assemblers. }
           all assemblers. }
-        asd_cpu
+        asd_cpu,
+        { for the OMF object format }
+        asd_omf_linnum_line,
+        { RISC-V }
+        asd_option
       );
       );
 
 
       TAsmSehDirective=(
       TAsmSehDirective=(
@@ -362,10 +398,11 @@ interface
           ash_endprologue,ash_handler,ash_handlerdata,
           ash_endprologue,ash_handler,ash_handlerdata,
           ash_eh,ash_32,ash_no32,
           ash_eh,ash_32,ash_no32,
           ash_setframe,ash_stackalloc,ash_pushreg,
           ash_setframe,ash_stackalloc,ash_pushreg,
-          ash_savereg,ash_savexmm,ash_pushframe
+          ash_savereg,ash_savexmm,ash_pushframe,
+          ash_pushnv,ash_savenv
         );
         );
 
 
-      TSymbolPairKind = (spk_set, spk_thumb_set, spk_localentry);
+      TSymbolPairKind = (spk_set, spk_set_global, spk_thumb_set, spk_localentry);
 
 
 
 
     const
     const
@@ -391,17 +428,22 @@ interface
         { ARM }
         { ARM }
         'thumb_func',
         'thumb_func',
         'code',
         'code',
-        'cpu'
+        'cpu',
+        { for the OMF object format }
+        'omf_line',
+        { RISC-V }
+        'option'
       );
       );
       sehdirectivestr : array[TAsmSehDirective] of string[16]=(
       sehdirectivestr : array[TAsmSehDirective] of string[16]=(
         '.seh_proc','.seh_endproc',
         '.seh_proc','.seh_endproc',
         '.seh_endprologue','.seh_handler','.seh_handlerdata',
         '.seh_endprologue','.seh_handler','.seh_handlerdata',
         '.seh_eh','.seh_32','seh_no32',
         '.seh_eh','.seh_32','seh_no32',
         '.seh_setframe','.seh_stackalloc','.seh_pushreg',
         '.seh_setframe','.seh_stackalloc','.seh_pushreg',
-        '.seh_savereg','.seh_savexmm','.seh_pushframe'
+        '.seh_savereg','.seh_savexmm','.seh_pushframe',
+        '.pushnv','.savenv'
       );
       );
       symbolpairkindstr: array[TSymbolPairKind] of string[11]=(
       symbolpairkindstr: array[TSymbolPairKind] of string[11]=(
-        '.set', '.thumb_set', '.localentry'
+        '.set', '.set', '.thumb_set', '.localentry'
       );
       );
 
 
     type
     type
@@ -410,6 +452,9 @@ interface
         { please keep the size of this record <=12 bytes and keep it properly aligned }
         { please keep the size of this record <=12 bytes and keep it properly aligned }
         toper = record
         toper = record
           ot : longint;
           ot : longint;
+        {$ifdef x86}
+          vopext: smallint;
+        {$ENDIF}
           case typ : toptype of
           case typ : toptype of
             top_none   : ();
             top_none   : ();
             top_reg    : (reg:tregister);
             top_reg    : (reg:tregister);
@@ -426,6 +471,7 @@ interface
         {$if defined(arm) or defined(aarch64)}
         {$if defined(arm) or defined(aarch64)}
             top_shifterop : (shifterop : pshifterop);
             top_shifterop : (shifterop : pshifterop);
             top_conditioncode : (cc : TAsmCond);
             top_conditioncode : (cc : TAsmCond);
+            top_realconst : (val_real:bestreal);
         {$endif defined(arm) or defined(aarch64)}
         {$endif defined(arm) or defined(aarch64)}
         {$ifdef m68k}
         {$ifdef m68k}
             top_regset : (dataregset,addrregset,fpuregset: tcpuregisterset);
             top_regset : (dataregset,addrregset,fpuregset: tcpuregisterset);
@@ -451,7 +497,12 @@ interface
             top_fpcond : (fpcond: tllvmfpcmp);
             top_fpcond : (fpcond: tllvmfpcmp);
             top_para   : (paras: tfplist);
             top_para   : (paras: tfplist);
             top_asmlist : (asmlist: tasmlist);
             top_asmlist : (asmlist: tasmlist);
+            top_callingconvention: (callingconvention: tproccalloption);
         {$endif llvm}
         {$endif llvm}
+        {$if defined(riscv32) or defined(riscv64)}
+            top_fenceflags : (fenceflags : TFenceFlags);
+            top_roundingmode : (roundingmode : TRoundingMode);
+        {$endif defined(riscv32) or defined(riscv64)}
         end;
         end;
         poper=^toper;
         poper=^toper;
 
 
@@ -509,6 +560,7 @@ interface
           constructor Create_Global(_sym:tasmsymbol;siz:longint);
           constructor Create_Global(_sym:tasmsymbol;siz:longint);
           constructor Createname(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
           constructor Createname(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
           constructor Createname_global(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
           constructor Createname_global(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
+          constructor Createname_hidden(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
           constructor Createname_global_value(const _name : string;_symtyp:Tasmsymtype;siz:longint;val:ptruint;def:tdef);
           constructor Createname_global_value(const _name : string;_symtyp:Tasmsymtype;siz:longint;val:ptruint;def:tdef);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -559,14 +611,17 @@ interface
           function getcopy:tlinkedlistitem;override;
           function getcopy:tlinkedlistitem;override;
        end;
        end;
 
 
-
        { Generates a section / segment directive }
        { Generates a section / segment directive }
        tai_section = class(tai)
        tai_section = class(tai)
           sectype  : TAsmSectiontype;
           sectype  : TAsmSectiontype;
           secorder : TasmSectionorder;
           secorder : TasmSectionorder;
-          secalign : byte;
+          secalign : longint;
           name     : pshortstring;
           name     : pshortstring;
-          sec      : TObjSection; { used in binary writer }
+          { used in binary writer }
+          sec      : TObjSection;
+          { used only by ELF so far }
+          secflags : TSectionFlags;
+          secprogbits : TSectionProgbits;
           destructor Destroy;override;
           destructor Destroy;override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -574,7 +629,7 @@ interface
          private
          private
           { this constructor is made private on purpose }
           { this constructor is made private on purpose }
           { because sections should be created via new_section() }
           { because sections should be created via new_section() }
-          constructor Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+          constructor Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:longint;Asecorder:TasmSectionorder=secorder_default);
 {$pop}
 {$pop}
        end;
        end;
 
 
@@ -584,8 +639,9 @@ interface
           is_global : boolean;
           is_global : boolean;
           sym       : tasmsymbol;
           sym       : tasmsymbol;
           size      : asizeint;
           size      : asizeint;
-          constructor Create(const _name : string;_size : asizeint; def: tdef);
-          constructor Create_global(const _name : string;_size : asizeint; def: tdef);
+          constructor Create(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_global(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure derefimpl;override;
           procedure derefimpl;override;
@@ -604,6 +660,9 @@ interface
           symofs,
           symofs,
           value   : int64;
           value   : int64;
           consttype : taiconst_type;
           consttype : taiconst_type;
+          { sleb128 and uleb128 values have a varying length, by calling FixSize their size can be fixed
+            to avoid that other offsets need to be changed. The value to write is stored in fixed_size }
+          fixed_size : byte;
           { we use for the 128bit int64/qword for now because I can't imagine a
           { we use for the 128bit int64/qword for now because I can't imagine a
             case where we need 128 bit now (FK) }
             case where we need 128 bit now (FK) }
           constructor Create(_typ:taiconst_type;_value : int64);
           constructor Create(_typ:taiconst_type;_value : int64);
@@ -625,6 +684,10 @@ interface
 {$ifdef i8086}
 {$ifdef i8086}
           constructor Create_sym_near(_sym:tasmsymbol);
           constructor Create_sym_near(_sym:tasmsymbol);
           constructor Create_sym_far(_sym:tasmsymbol);
           constructor Create_sym_far(_sym:tasmsymbol);
+          constructor Createname_near(const name:string;ofs:asizeint);
+          constructor Createname_far(const name:string;ofs:asizeint);
+          constructor Createname_near(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
+          constructor Createname_far(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
 {$endif i8086}
 {$endif i8086}
           constructor Create_type_sym(_typ:taiconst_type;_sym:tasmsymbol);
           constructor Create_type_sym(_typ:taiconst_type;_sym:tasmsymbol);
           constructor Create_sym_offset(_sym:tasmsymbol;ofs:asizeint);
           constructor Create_sym_offset(_sym:tasmsymbol;ofs:asizeint);
@@ -644,6 +707,9 @@ interface
           constructor Create_int_codeptr_unaligned(_value: int64);
           constructor Create_int_codeptr_unaligned(_value: int64);
           constructor Create_int_dataptr(_value: int64);
           constructor Create_int_dataptr(_value: int64);
           constructor Create_int_dataptr_unaligned(_value: int64);
           constructor Create_int_dataptr_unaligned(_value: int64);
+{$ifdef avr}
+          constructor Create_int_dataptr_unaligned(_value: int64; size: taiconst_type);
+{$endif}
 {$ifdef i8086}
 {$ifdef i8086}
           constructor Create_seg_name(const name:string);
           constructor Create_seg_name(const name:string);
           constructor Create_dgroup;
           constructor Create_dgroup;
@@ -654,6 +720,9 @@ interface
           procedure derefimpl;override;
           procedure derefimpl;override;
           function getcopy:tlinkedlistitem;override;
           function getcopy:tlinkedlistitem;override;
           function size:longint;
           function size:longint;
+          { sleb128 and uleb128 values have a varying length, by calling FixSize their size can be fixed
+            to avoid that other offsets need to be changed. The value to write is stored in fixed_size }
+          Procedure FixSize;
        end;
        end;
 
 
        { floating point const }
        { floating point const }
@@ -795,7 +864,7 @@ interface
            procedure derefimpl;override;
            procedure derefimpl;override;
            procedure SetCondition(const c:TAsmCond);
            procedure SetCondition(const c:TAsmCond);
            procedure allocate_oper(opers:longint);
            procedure allocate_oper(opers:longint);
-           procedure loadconst(opidx:longint;l:aint);
+           procedure loadconst(opidx:longint;l:tcgint);
            procedure loadsymbol(opidx:longint;s:tasmsymbol;sofs:longint);
            procedure loadsymbol(opidx:longint;s:tasmsymbol;sofs:longint);
            procedure loadlocal(opidx:longint;s:pointer;sofs:longint;indexreg:tregister;scale:byte;getoffset,forceref:boolean);
            procedure loadlocal(opidx:longint;s:pointer;sofs:longint;indexreg:tregister;scale:byte;getoffset,forceref:boolean);
            procedure loadref(opidx:longint;const r:treference);
            procedure loadref(opidx:longint;const r:treference);
@@ -822,11 +891,13 @@ interface
         { alignment for operator }
         { alignment for operator }
         tai_align_abstract = class(tai)
         tai_align_abstract = class(tai)
            aligntype : byte;   { 1 = no align, 2 = word align, 4 = dword align }
            aligntype : byte;   { 1 = no align, 2 = word align, 4 = dword align }
+           maxbytes  : byte;   { if needed bytes would be larger than maxbyes, alignment is ignored }
            fillsize  : byte;   { real size to fill }
            fillsize  : byte;   { real size to fill }
            fillop    : byte;   { value to fill with - optional }
            fillop    : byte;   { value to fill with - optional }
            use_op    : boolean;
            use_op    : boolean;
            constructor Create(b:byte);virtual;
            constructor Create(b:byte);virtual;
            constructor Create_op(b: byte; _op: byte);virtual;
            constructor Create_op(b: byte; _op: byte);virtual;
+           constructor create_max(b: byte; max: byte);virtual;
            constructor Create_zeros(b:byte);
            constructor Create_zeros(b:byte);
            constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
            constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
            procedure ppuwrite(ppufile:tcompilerppufile);override;
            procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -911,6 +982,18 @@ interface
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
         end;
         end;
 
 
+        teattrtyp = (eattrtype_none,eattrtype_dword,eattrtype_ntbs);
+        tai_eabi_attribute = class(tai)
+          eattr_typ : teattrtyp;
+          tag,value : dword;
+          valuestr : pstring;
+          constructor create(atag,avalue : dword);
+          constructor create(atag : dword;const avalue : string);
+          destructor destroy;override;
+          constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
+          procedure ppuwrite(ppufile:tcompilerppufile);override;
+        end;
+
     var
     var
       { array with all class types for tais }
       { array with all class types for tais }
       aiclass : taiclassarray;
       aiclass : taiclassarray;
@@ -924,7 +1007,7 @@ interface
       add_reg_instruction_hook : tadd_reg_instruction_proc;
       add_reg_instruction_hook : tadd_reg_instruction_proc;
 
 
     procedure maybe_new_object_file(list:TAsmList);
     procedure maybe_new_object_file(list:TAsmList);
-    procedure new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    function new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default) : tai_section;
 
 
     function ppuloadai(ppufile:tcompilerppufile):tai;
     function ppuloadai(ppufile:tcompilerppufile):tai;
     procedure ppuwriteai(ppufile:tcompilerppufile;n:tai);
     procedure ppuwriteai(ppufile:tcompilerppufile;n:tai);
@@ -933,14 +1016,17 @@ interface
 implementation
 implementation
 
 
     uses
     uses
+{$ifdef x86}
+      aasmcpu,
+{$endif x86}
       SysUtils,
       SysUtils,
       verbose,
       verbose,
-      globals;
+      globals,
+      ppu;
 
 
     const
     const
       pputaimarker = 254;
       pputaimarker = 254;
 
 
-
 {****************************************************************************
 {****************************************************************************
                                  Helpers
                                  Helpers
  ****************************************************************************}
  ****************************************************************************}
@@ -952,9 +1038,10 @@ implementation
       end;
       end;
 
 
 
 
-    procedure new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    function new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default) : tai_section;
       begin
       begin
-        list.concat(tai_section.create(Asectype,Aname,Aalign,Asecorder));
+        Result:=tai_section.create(Asectype,Aname,Aalign,Asecorder);
+        list.concat(Result);
         inc(list.section_count);
         inc(list.section_count);
         list.concat(cai_align.create(Aalign));
         list.concat(cai_align.create(Aalign));
       end;
       end;
@@ -1167,13 +1254,14 @@ implementation
                              TAI_SECTION
                              TAI_SECTION
  ****************************************************************************}
  ****************************************************************************}
 
 
-    constructor tai_section.Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    constructor tai_section.Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:longint;Asecorder:TasmSectionorder=secorder_default);
       begin
       begin
         inherited Create;
         inherited Create;
         typ:=ait_section;
         typ:=ait_section;
         sectype:=asectype;
         sectype:=asectype;
         secalign:=Aalign;
         secalign:=Aalign;
         secorder:=Asecorder;
         secorder:=Asecorder;
+        TObjData.sectiontype2progbitsandflags(sectype,secprogbits,secflags);
         name:=stringdup(Aname);
         name:=stringdup(Aname);
         sec:=nil;
         sec:=nil;
       end;
       end;
@@ -1183,8 +1271,10 @@ implementation
       begin
       begin
         inherited ppuload(t,ppufile);
         inherited ppuload(t,ppufile);
         sectype:=TAsmSectiontype(ppufile.getbyte);
         sectype:=TAsmSectiontype(ppufile.getbyte);
-        secalign:=ppufile.getbyte;
+        secalign:=ppufile.getlongint;
         name:=ppufile.getpshortstring;
         name:=ppufile.getpshortstring;
+        ppufile.getset(tppuset1(secflags));
+        secprogbits:=TSectionProgbits(ppufile.getbyte);
         sec:=nil;
         sec:=nil;
       end;
       end;
 
 
@@ -1199,8 +1289,10 @@ implementation
       begin
       begin
         inherited ppuwrite(ppufile);
         inherited ppuwrite(ppufile);
         ppufile.putbyte(byte(sectype));
         ppufile.putbyte(byte(sectype));
-        ppufile.putbyte(secalign);
+        ppufile.putlongint(secalign);
         ppufile.putstring(name^);
         ppufile.putstring(name^);
+        ppufile.putset(tppuset1(secflags));
+        ppufile.putbyte(byte(secprogbits));
       end;
       end;
 
 
 
 
@@ -1208,12 +1300,12 @@ implementation
                              TAI_DATABLOCK
                              TAI_DATABLOCK
  ****************************************************************************}
  ****************************************************************************}
 
 
-    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef);
+    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
 
 
       begin
       begin
          inherited Create;
          inherited Create;
          typ:=ait_datablock;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,_typ,def);
          { keep things aligned }
          { keep things aligned }
          if _size<=0 then
          if _size<=0 then
            _size:=sizeof(aint);
            _size:=sizeof(aint);
@@ -1221,12 +1313,29 @@ implementation
          is_global:=false;
          is_global:=false;
       end;
       end;
 
 
+    constructor tai_datablock.Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ:Tasmsymtype);
+      begin
+        if tf_supports_hidden_symbols in target_info.flags then
+          begin
+            inherited Create;
+            typ:=ait_datablock;
+            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,_typ,def);
+            { keep things aligned }
+            if _size<=0 then
+              _size:=sizeof(aint);
+            size:=_size;
+            is_global:=true;
+          end
+        else
+          Create(_name,_size,def,_typ);
+      end;
 
 
-    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef);
+
+    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
       begin
       begin
          inherited Create;
          inherited Create;
          typ:=ait_datablock;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,_typ,def);
          { keep things aligned }
          { keep things aligned }
          if _size<=0 then
          if _size<=0 then
            _size:=sizeof(aint);
            _size:=sizeof(aint);
@@ -1240,7 +1349,7 @@ implementation
         inherited Create;
         inherited Create;
         sym:=ppufile.getasmsymbol;
         sym:=ppufile.getasmsymbol;
         size:=ppufile.getaint;
         size:=ppufile.getaint;
-        is_global:=boolean(ppufile.getbyte);
+        is_global:=ppufile.getboolean;
       end;
       end;
 
 
 
 
@@ -1249,7 +1358,7 @@ implementation
         inherited ppuwrite(ppufile);
         inherited ppuwrite(ppufile);
         ppufile.putasmsymbol(sym);
         ppufile.putasmsymbol(sym);
         ppufile.putaint(size);
         ppufile.putaint(size);
-        ppufile.putbyte(byte(is_global));
+        ppufile.putboolean(is_global);
       end;
       end;
 
 
 
 
@@ -1309,6 +1418,20 @@ implementation
          is_global:=true;
          is_global:=true;
       end;
       end;
 
 
+    constructor tai_symbol.Createname_hidden(const _name: string; _symtyp: Tasmsymtype; siz: longint; def: tdef);
+      begin
+        if tf_supports_hidden_symbols in target_info.flags then
+          begin
+            inherited Create;
+            typ:=ait_symbol;
+            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,_symtyp,def);
+            size:=siz;
+            is_global:=true;
+          end
+        else
+          Createname(_name, _symtyp, siz, def);
+      end;
+
 
 
     constructor tai_symbol.createname_global_value(const _name: string;_symtyp: tasmsymtype; siz: longint; val: ptruint;def:tdef);
     constructor tai_symbol.createname_global_value(const _name: string;_symtyp: tasmsymtype; siz: longint; val: ptruint;def:tdef);
       begin
       begin
@@ -1323,7 +1446,7 @@ implementation
         inherited ppuload(t,ppufile);
         inherited ppuload(t,ppufile);
         sym:=ppufile.getasmsymbol;
         sym:=ppufile.getasmsymbol;
         size:=ppufile.getlongint;
         size:=ppufile.getlongint;
-        is_global:=boolean(ppufile.getbyte);
+        is_global:=ppufile.getboolean;
       end;
       end;
 
 
 
 
@@ -1332,7 +1455,7 @@ implementation
         inherited ppuwrite(ppufile);
         inherited ppuwrite(ppufile);
         ppufile.putasmsymbol(sym);
         ppufile.putasmsymbol(sym);
         ppufile.putlongint(size);
         ppufile.putlongint(size);
-        ppufile.putbyte(byte(is_global));
+        ppufile.putboolean(is_global);
       end;
       end;
 
 
 
 
@@ -1616,11 +1739,40 @@ implementation
          consttype:=aitconst_ptr;
          consttype:=aitconst_ptr;
       end;
       end;
 
 
+
     constructor tai_const.Create_sym_far(_sym: tasmsymbol);
     constructor tai_const.Create_sym_far(_sym: tasmsymbol);
       begin
       begin
         self.create_sym(_sym);
         self.create_sym(_sym);
         consttype:=aitconst_farptr;
         consttype:=aitconst_farptr;
       end;
       end;
+
+
+    constructor tai_const.Createname_near(const name:string;ofs:asizeint);
+      begin
+        self.Createname(name,ofs);
+        consttype:=aitconst_ptr;
+      end;
+
+
+    constructor tai_const.Createname_far(const name:string;ofs:asizeint);
+      begin
+        self.Createname(name,ofs);
+        consttype:=aitconst_farptr;
+      end;
+
+
+    constructor tai_const.Createname_near(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
+      begin
+        self.Createname(name,_symtyp,ofs);
+        consttype:=aitconst_ptr;
+      end;
+
+
+    constructor tai_const.Createname_far(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
+      begin
+        self.Createname(name,_symtyp,ofs);
+        consttype:=aitconst_farptr;
+      end;
 {$endif i8086}
 {$endif i8086}
 
 
 
 
@@ -1691,7 +1843,7 @@ implementation
       end;
       end;
 
 
 
 
-    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym,_endsym: tasmsymbol; _ofs: int64);
+    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym, _endsym: tasmsymbol; _ofs: int64);
        begin
        begin
          self.create_sym_offset(_sym,_ofs);
          self.create_sym_offset(_sym,_ofs);
          consttype:=_typ;
          consttype:=_typ;
@@ -1832,6 +1984,20 @@ implementation
       end;
       end;
 
 
 
 
+{$ifdef avr}
+    constructor tai_const.Create_int_dataptr_unaligned(_value: int64;
+      size: taiconst_type);
+      begin
+        inherited Create;
+        typ:=ait_const;
+        consttype:=size;
+        sym:=nil;
+        endsym:=nil;
+        symofs:=0;
+        value:=_value;
+      end;
+{$endif avr}
+
 {$ifdef i8086}
 {$ifdef i8086}
     constructor tai_const.Create_seg_name(const name:string);
     constructor tai_const.Create_seg_name(const name:string);
       begin
       begin
@@ -1899,10 +2065,10 @@ implementation
           aitconst_16bit,aitconst_16bit_unaligned :
           aitconst_16bit,aitconst_16bit_unaligned :
             result:=2;
             result:=2;
           aitconst_32bit,aitconst_darwin_dwarf_delta32,
           aitconst_32bit,aitconst_darwin_dwarf_delta32,
-	  aitconst_32bit_unaligned:
+          aitconst_32bit_unaligned:
             result:=4;
             result:=4;
           aitconst_64bit,aitconst_darwin_dwarf_delta64,
           aitconst_64bit,aitconst_darwin_dwarf_delta64,
-	  aitconst_64bit_unaligned:
+          aitconst_64bit_unaligned:
             result:=8;
             result:=8;
           aitconst_secrel32_symbol,
           aitconst_secrel32_symbol,
           aitconst_rva_symbol :
           aitconst_rva_symbol :
@@ -1911,9 +2077,31 @@ implementation
             else
             else
               result:=sizeof(pint);
               result:=sizeof(pint);
           aitconst_uleb128bit :
           aitconst_uleb128bit :
-            result:=LengthUleb128(qword(value));
+            begin
+              if fixed_size>0 then
+                result:=fixed_size
+              else if sym=nil then
+                begin
+                  FixSize;
+                  result:=fixed_size;
+                end
+              else
+                { worst case }
+                result:=sizeof(pint)+2;
+            end;
           aitconst_sleb128bit :
           aitconst_sleb128bit :
-            result:=LengthSleb128(value);
+            begin
+              if fixed_size>0 then
+                result:=fixed_size
+              else if sym=nil then
+                begin
+                  FixSize;
+                  result:=fixed_size;
+                end
+              else
+                { worst case }
+                result:=sizeof(pint)+2;
+            end;
           aitconst_half16bit,
           aitconst_half16bit,
           aitconst_gs:
           aitconst_gs:
             result:=2;
             result:=2;
@@ -1927,12 +2115,35 @@ implementation
             result:=sizeof(pint);
             result:=sizeof(pint);
           aitconst_gotoff_symbol:
           aitconst_gotoff_symbol:
             result:=4;
             result:=4;
+          aitconst_gottpoff:
+            result:=4;
+          aitconst_tlsgd:
+            result:=4;
+          aitconst_tpoff:
+            result:=4;
+          aitconst_tlsdesc:
+            result:=4;
+          aitconst_dtpoff:
+            result:=4;
           else
           else
             internalerror(200603253);
             internalerror(200603253);
         end;
         end;
       end;
       end;
 
 
 
 
+    procedure tai_const.FixSize;
+      begin
+        case consttype of
+          aitconst_uleb128bit:
+            fixed_size:=LengthUleb128(qword(value));
+          aitconst_sleb128bit:
+            fixed_size:=LengthSleb128(value);
+          else
+            Internalerror(2019030301);
+        end;
+      end;
+
+
 {****************************************************************************
 {****************************************************************************
                                TAI_realconst
                                TAI_realconst
  ****************************************************************************}
  ****************************************************************************}
@@ -2017,8 +2228,6 @@ implementation
             value.s128val:=ppufile.getreal;
             value.s128val:=ppufile.getreal;
           aitrealconst_s64comp:
           aitrealconst_s64comp:
             value.s64compval:=comp(ppufile.getint64);
             value.s64compval:=comp(ppufile.getint64);
-          else
-            internalerror(2014050602);
         end;
         end;
       end;
       end;
 
 
@@ -2046,8 +2255,6 @@ implementation
               c:=comp(value.s64compval);
               c:=comp(value.s64compval);
               ppufile.putint64(int64(c));
               ppufile.putint64(int64(c));
             end
             end
-          else
-            internalerror(2014050601);
         end;
         end;
       end;
       end;
 
 
@@ -2076,8 +2283,6 @@ implementation
             result:=10;
             result:=10;
           aitrealconst_s128bit:
           aitrealconst_s128bit:
             result:=16;
             result:=16;
-          else
-            internalerror(2014050603);
         end;
         end;
       end;
       end;
 
 
@@ -2420,7 +2625,7 @@ implementation
         inherited ppuload(t,ppufile);
         inherited ppuload(t,ppufile);
         temppos:=ppufile.getlongint;
         temppos:=ppufile.getlongint;
         tempsize:=ppufile.getlongint;
         tempsize:=ppufile.getlongint;
-        allocation:=boolean(ppufile.getbyte);
+        allocation:=ppufile.getboolean;
 {$ifdef EXTDEBUG}
 {$ifdef EXTDEBUG}
         problem:=nil;
         problem:=nil;
 {$endif EXTDEBUG}
 {$endif EXTDEBUG}
@@ -2432,7 +2637,7 @@ implementation
         inherited ppuwrite(ppufile);
         inherited ppuwrite(ppufile);
         ppufile.putlongint(temppos);
         ppufile.putlongint(temppos);
         ppufile.putlongint(tempsize);
         ppufile.putlongint(tempsize);
-        ppufile.putbyte(byte(allocation));
+        ppufile.putboolean(allocation);
       end;
       end;
 
 
 
 
@@ -2500,7 +2705,7 @@ implementation
         inherited ppuload(t,ppufile);
         inherited ppuload(t,ppufile);
         ppufile.getdata(reg,sizeof(Tregister));
         ppufile.getdata(reg,sizeof(Tregister));
         ratype:=tregalloctype(ppufile.getbyte);
         ratype:=tregalloctype(ppufile.getbyte);
-        keep:=boolean(ppufile.getbyte);
+        keep:=ppufile.getboolean;
       end;
       end;
 
 
 
 
@@ -2509,7 +2714,7 @@ implementation
         inherited ppuwrite(ppufile);
         inherited ppuwrite(ppufile);
         ppufile.putdata(reg,sizeof(Tregister));
         ppufile.putdata(reg,sizeof(Tregister));
         ppufile.putbyte(byte(ratype));
         ppufile.putbyte(byte(ratype));
-        ppufile.putbyte(byte(keep));
+        ppufile.putboolean(keep);
       end;
       end;
 
 
 
 
@@ -2553,7 +2758,7 @@ implementation
       end;
       end;
 
 
 
 
-    procedure tai_cpu_abstract.loadconst(opidx:longint;l:aint);
+    procedure tai_cpu_abstract.loadconst(opidx:longint;l:tcgint);
       begin
       begin
         allocate_oper(opidx+1);
         allocate_oper(opidx+1);
         with oper[opidx]^ do
         with oper[opidx]^ do
@@ -2596,6 +2801,9 @@ implementation
                localscale:=scale;
                localscale:=scale;
                localgetoffset:=getoffset;
                localgetoffset:=getoffset;
                localforceref:=forceref;
                localforceref:=forceref;
+{$ifdef x86}
+               localsegment:=NR_NO;
+{$endif x86}
              end;
              end;
            typ:=top_local;
            typ:=top_local;
          end;
          end;
@@ -2603,6 +2811,10 @@ implementation
 
 
 
 
     procedure tai_cpu_abstract.loadref(opidx:longint;const r:treference);
     procedure tai_cpu_abstract.loadref(opidx:longint;const r:treference);
+{$ifdef x86}
+      var
+        si_param: ShortInt;
+{$endif}
       begin
       begin
         allocate_oper(opidx+1);
         allocate_oper(opidx+1);
         with oper[opidx]^ do
         with oper[opidx]^ do
@@ -2617,7 +2829,17 @@ implementation
 {$ifdef x86}
 {$ifdef x86}
             { We allow this exception for x86, since overloading this would be
             { We allow this exception for x86, since overloading this would be
               too much of a a speed penalty}
               too much of a a speed penalty}
-            if (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+            if is_x86_parameterized_string_op(opcode) then
+              begin
+                si_param:=get_x86_string_op_si_param(opcode);
+                if (si_param<>-1) and (taicpu(self).OperandOrder=op_att) then
+                  si_param:=x86_parameterized_string_op_param_count(opcode)-si_param-1;
+                if (si_param=opidx) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                  segprefix:=ref^.segment;
+              end
+            else if (opcode=A_XLAT) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+              segprefix:=ref^.segment
+            else if (ref^.segment<>NR_NO) and (ref^.segment<>get_default_segment_of_ref(ref^)) then
               segprefix:=ref^.segment;
               segprefix:=ref^.segment;
 {$endif}
 {$endif}
 {$ifndef llvm}
 {$ifndef llvm}
@@ -2673,6 +2895,10 @@ implementation
 
 
 
 
     procedure tai_cpu_abstract.loadoper(opidx:longint;o:toper);
     procedure tai_cpu_abstract.loadoper(opidx:longint;o:toper);
+{$ifdef x86}
+      var
+        si_param: ShortInt;
+{$endif x86}
       begin
       begin
         allocate_oper(opidx+1);
         allocate_oper(opidx+1);
         clearop(opidx);
         clearop(opidx);
@@ -2691,7 +2917,19 @@ implementation
                   new(ref);
                   new(ref);
                   ref^:=o.ref^;
                   ref^:=o.ref^;
 {$ifdef x86}
 {$ifdef x86}
-                  if (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                  { We allow this exception for x86, since overloading this would be
+                    too much of a a speed penalty}
+                  if is_x86_parameterized_string_op(opcode) then
+                    begin
+                      si_param:=get_x86_string_op_si_param(opcode);
+                      if (si_param<>-1) and (taicpu(self).OperandOrder=op_att) then
+                        si_param:=x86_parameterized_string_op_param_count(opcode)-si_param-1;
+                      if (si_param=opidx) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                        segprefix:=ref^.segment;
+                    end
+                  else if (opcode=A_XLAT) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                    segprefix:=ref^.segment
+                  else if (ref^.segment<>NR_NO) and (ref^.segment<>get_default_segment_of_ref(ref^)) then
                     segprefix:=ref^.segment;
                     segprefix:=ref^.segment;
 {$endif x86}
 {$endif x86}
                   if assigned(add_reg_instruction_hook) then
                   if assigned(add_reg_instruction_hook) then
@@ -2709,6 +2947,8 @@ implementation
                     add_reg_instruction_hook(self,shifterop^.rs);
                     add_reg_instruction_hook(self,shifterop^.rs);
                 end;
                 end;
 {$endif ARM}
 {$endif ARM}
+              else
+                ;
              end;
              end;
           end;
           end;
       end;
       end;
@@ -2734,6 +2974,8 @@ implementation
               top_wstring:
               top_wstring:
                 donewidestring(pwstrval);
                 donewidestring(pwstrval);
 {$endif jvm}
 {$endif jvm}
+              else
+                ;
             end;
             end;
             typ:=top_none;
             typ:=top_none;
           end;
           end;
@@ -2787,6 +3029,8 @@ implementation
                   p.oper[i]^.shifterop^:=oper[i]^.shifterop^;
                   p.oper[i]^.shifterop^:=oper[i]^.shifterop^;
                 end;
                 end;
 {$endif ARM}
 {$endif ARM}
+              else
+                ;
             end;
             end;
           end;
           end;
         getcopy:=p;
         getcopy:=p;
@@ -2821,16 +3065,17 @@ implementation
         i : integer;
         i : integer;
       begin
       begin
         inherited ppuload(t,ppufile);
         inherited ppuload(t,ppufile);
-        { hopefully, we don't get problems with big/litte endian here when cross compiling :/ }
+        { hopefully, we don't get problems with big/little endian here when cross compiling :/ }
         ppufile.getdata(condition,sizeof(tasmcond));
         ppufile.getdata(condition,sizeof(tasmcond));
-        allocate_oper(ppufile.getbyte);
+        ops := ppufile.getbyte;
+        allocate_oper(ops);
         for i:=0 to ops-1 do
         for i:=0 to ops-1 do
           ppuloadoper(ppufile,oper[i]^);
           ppuloadoper(ppufile,oper[i]^);
         opcode:=tasmop(ppufile.getword);
         opcode:=tasmop(ppufile.getword);
 {$ifdef x86}
 {$ifdef x86}
         ppufile.getdata(segprefix,sizeof(Tregister));
         ppufile.getdata(segprefix,sizeof(Tregister));
 {$endif x86}
 {$endif x86}
-        is_jmp:=boolean(ppufile.getbyte);
+        is_jmp:=ppufile.getboolean;
       end;
       end;
 
 
 
 
@@ -2847,7 +3092,7 @@ implementation
 {$ifdef x86}
 {$ifdef x86}
         ppufile.putdata(segprefix,sizeof(Tregister));
         ppufile.putdata(segprefix,sizeof(Tregister));
 {$endif x86}
 {$endif x86}
-        ppufile.putbyte(byte(is_jmp));
+        ppufile.putboolean(is_jmp);
       end;
       end;
 
 
 
 
@@ -2920,6 +3165,9 @@ implementation
                 begin
                 begin
                   ppufile.getderef(localsymderef);
                   ppufile.getderef(localsymderef);
                   localsymofs:=ppufile.getaint;
                   localsymofs:=ppufile.getaint;
+{$ifdef x86}
+                  localsegment:=tregister(ppufile.getlongint);
+{$endif x86}
                   localindexreg:=tregister(ppufile.getlongint);
                   localindexreg:=tregister(ppufile.getlongint);
                   localscale:=ppufile.getbyte;
                   localscale:=ppufile.getbyte;
                   localgetoffset:=(ppufile.getbyte<>0);
                   localgetoffset:=(ppufile.getbyte<>0);
@@ -2959,6 +3207,9 @@ implementation
                 begin
                 begin
                   ppufile.putderef(localsymderef);
                   ppufile.putderef(localsymderef);
                   ppufile.putaint(localsymofs);
                   ppufile.putaint(localsymofs);
+{$ifdef x86}
+                  ppufile.putlongint(longint(localsegment));
+{$endif x86}
                   ppufile.putlongint(longint(localindexreg));
                   ppufile.putlongint(longint(localindexreg));
                   ppufile.putbyte(localscale);
                   ppufile.putbyte(localscale);
                   ppufile.putbyte(byte(localgetoffset));
                   ppufile.putbyte(byte(localgetoffset));
@@ -2988,6 +3239,7 @@ implementation
           fillsize:=0;
           fillsize:=0;
           fillop:=0;
           fillop:=0;
           use_op:=false;
           use_op:=false;
+          maxbytes:=aligntype;
        end;
        end;
 
 
 
 
@@ -3002,6 +3254,22 @@ implementation
           fillsize:=0;
           fillsize:=0;
           fillop:=_op;
           fillop:=_op;
           use_op:=true;
           use_op:=true;
+          maxbytes:=aligntype;
+       end;
+
+
+     constructor tai_align_abstract.create_max(b : byte; max : byte);
+       begin
+          inherited Create;
+          typ:=ait_align;
+          if b in [1,2,4,8,16,32] then
+            aligntype := b
+          else
+            aligntype := 1;
+          maxbytes:=max;
+          fillsize:=0;
+          fillop:=0;
+          use_op:=false;
        end;
        end;
 
 
 
 
@@ -3016,6 +3284,7 @@ implementation
          use_op:=true;
          use_op:=true;
          fillsize:=0;
          fillsize:=0;
          fillop:=0;
          fillop:=0;
+         maxbytes:=aligntype;
        end;
        end;
 
 
 
 
@@ -3034,7 +3303,8 @@ implementation
         aligntype:=ppufile.getbyte;
         aligntype:=ppufile.getbyte;
         fillsize:=0;
         fillsize:=0;
         fillop:=ppufile.getbyte;
         fillop:=ppufile.getbyte;
-        use_op:=boolean(ppufile.getbyte);
+        use_op:=ppufile.getboolean;
+        maxbytes:=ppufile.getbyte;
       end;
       end;
 
 
 
 
@@ -3043,7 +3313,8 @@ implementation
         inherited ppuwrite(ppufile);
         inherited ppuwrite(ppufile);
         ppufile.putbyte(aligntype);
         ppufile.putbyte(aligntype);
         ppufile.putbyte(fillop);
         ppufile.putbyte(fillop);
-        ppufile.putbyte(byte(use_op));
+        ppufile.putboolean(use_op);
+        ppufile.putbyte(maxbytes);
       end;
       end;
 
 
 
 
@@ -3064,7 +3335,9 @@ implementation
         sd_reg,        { pushreg }
         sd_reg,        { pushreg }
         sd_regoffset,  { savereg }
         sd_regoffset,  { savereg }
         sd_regoffset,  { savexmm }
         sd_regoffset,  { savexmm }
-        sd_none        { pushframe }
+        sd_none,       { pushframe }
+        sd_reg,        { pushnv }
+        sd_none        { savenv }
       );
       );
 
 
     constructor tai_seh_directive.create(_kind:TAsmSehDirective);
     constructor tai_seh_directive.create(_kind:TAsmSehDirective);
@@ -3119,8 +3392,6 @@ implementation
               ppufile.getdata(data.reg,sizeof(TRegister));
               ppufile.getdata(data.reg,sizeof(TRegister));
               data.offset:=ppufile.getdword;
               data.offset:=ppufile.getdword;
             end;
             end;
-        else
-          InternalError(2011091201);
         end;
         end;
       end;
       end;
 
 
@@ -3148,8 +3419,6 @@ implementation
               ppufile.putdata(data.reg,sizeof(TRegister));
               ppufile.putdata(data.reg,sizeof(TRegister));
               ppufile.putdword(data.offset);
               ppufile.putdword(data.offset);
             end;
             end;
-        else
-          InternalError(2011091202);
         end;
         end;
       end;
       end;
 
 
@@ -3157,6 +3426,50 @@ implementation
       begin
       begin
       end;
       end;
 
 
+
+{****************************************************************************
+                              tai_eabi_attribute
+ ****************************************************************************}
+
+    constructor tai_eabi_attribute.create(atag,avalue : dword);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_dword;
+        tag:=atag;
+        value:=avalue;
+      end;
+
+
+    constructor tai_eabi_attribute.create(atag: dword; const avalue: string);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_ntbs;
+        tag:=atag;
+        valuestr:=NewStr(avalue);
+      end;
+
+
+    destructor tai_eabi_attribute.destroy;
+      begin
+        Inherited Destroy;
+      end;
+
+
+    constructor tai_eabi_attribute.ppuload(t:taitype;ppufile:tcompilerppufile);
+      begin
+      end;
+
+
+    procedure tai_eabi_attribute.ppuwrite(ppufile:tcompilerppufile);
+      begin
+        inherited ppuwrite(ppufile);
+        ppufile.putdword(tag);
+        ppufile.putdword(value);
+      end;
+
+
 {$ifdef JVM}
 {$ifdef JVM}
 
 
 {****************************************************************************
 {****************************************************************************

+ 322 - 89
compiler/aggas.pas

@@ -1,4 +1,4 @@
-{
+  {
     Copyright (c) 1998-2006 by the Free Pascal team
     Copyright (c) 1998-2006 by the Free Pascal team
 
 
     This unit implements the generic part of the GNU assembler
     This unit implements the generic part of the GNU assembler
@@ -32,7 +32,7 @@ interface
 
 
     uses
     uses
       globtype,globals,
       globtype,globals,
-      aasmbase,aasmtai,aasmdata,
+      aasmbase,aasmtai,aasmdata,aasmcfi,
       assemble;
       assemble;
 
 
     type
     type
@@ -48,12 +48,14 @@ interface
         function sectionname(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder):string;virtual;
         function sectionname(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder):string;virtual;
         function sectionattrs(atype:TAsmSectiontype):string;virtual;
         function sectionattrs(atype:TAsmSectiontype):string;virtual;
         function sectionattrs_coff(atype:TAsmSectiontype):string;virtual;
         function sectionattrs_coff(atype:TAsmSectiontype):string;virtual;
-        function sectionalignment_aix(atype:TAsmSectiontype;secalign: byte):string;
-        procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:byte);
+        function sectionalignment_aix(atype:TAsmSectiontype;secalign: longint):string;
+        procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;
+          secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);virtual;
         procedure WriteExtraHeader;virtual;
         procedure WriteExtraHeader;virtual;
         procedure WriteExtraFooter;virtual;
         procedure WriteExtraFooter;virtual;
         procedure WriteInstruction(hp: tai);
         procedure WriteInstruction(hp: tai);
         procedure WriteWeakSymbolRef(s: tasmsymbol); virtual;
         procedure WriteWeakSymbolRef(s: tasmsymbol); virtual;
+        procedure WriteHiddenSymbol(sym: TAsmSymbol);
         procedure WriteAixStringConst(hp: tai_string);
         procedure WriteAixStringConst(hp: tai_string);
         procedure WriteAixIntConst(hp: tai_const);
         procedure WriteAixIntConst(hp: tai_const);
         procedure WriteUnalignedIntConst(hp: tai_const);
         procedure WriteUnalignedIntConst(hp: tai_const);
@@ -67,6 +69,7 @@ interface
         setcount: longint;
         setcount: longint;
         procedure WriteDecodedSleb128(a: int64);
         procedure WriteDecodedSleb128(a: int64);
         procedure WriteDecodedUleb128(a: qword);
         procedure WriteDecodedUleb128(a: qword);
+        procedure WriteCFI(hp: tai_cfi_base);
         function NextSetLabel: string;
         function NextSetLabel: string;
        protected
        protected
         InstrWriter: TCPUInstrWriter;
         InstrWriter: TCPUInstrWriter;
@@ -211,11 +214,11 @@ implementation
 { vtable for a class called Window:                                       }
 { vtable for a class called Window:                                       }
 { .section .data.rel.ro._ZTV6Window,"awG",@progbits,_ZTV6Window,comdat    }
 { .section .data.rel.ro._ZTV6Window,"awG",@progbits,_ZTV6Window,comdat    }
 { TODO: .data.ro not yet working}
 { TODO: .data.ro not yet working}
-{$if defined(arm) or defined(powerpc)}
+{$if defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
           '.rodata',
-{$else arm}
+{$else defined(arm) or defined(riscv64) or defined(powerpc)}
           '.data',
           '.data',
-{$endif arm}
+{$endif defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
           '.rodata',
           '.bss',
           '.bss',
           '.threadvar',
           '.threadvar',
@@ -269,7 +272,9 @@ implementation
           '.obcj_nlcatlist',
           '.obcj_nlcatlist',
           '.objc_protolist',
           '.objc_protolist',
           '.stack',
           '.stack',
-          '.heap'
+          '.heap',
+          '.gcc_except_table',
+          '.ARM.attributes'
         );
         );
         secnames_pic : array[TAsmSectiontype] of string[length('__DATA, __datacoal_nt,coalesced')] = ('','',
         secnames_pic : array[TAsmSectiontype] of string[length('__DATA, __datacoal_nt,coalesced')] = ('','',
           '.text',
           '.text',
@@ -328,7 +333,9 @@ implementation
           '.obcj_nlcatlist',
           '.obcj_nlcatlist',
           '.objc_protolist',
           '.objc_protolist',
           '.stack',
           '.stack',
-          '.heap'
+          '.heap',
+          '.gcc_except_table',
+          '..ARM.attributes'
         );
         );
       var
       var
         sep     : string[3];
         sep     : string[3];
@@ -346,15 +353,19 @@ implementation
             exit;
             exit;
           end;
           end;
 
 
-        if (atype=sec_threadvar) and
-          (target_info.system in (systems_windows+systems_wince)) then
-          secname:='.tls';
+        if atype=sec_threadvar then
+          begin
+            if (target_info.system in (systems_windows+systems_wince)) then
+              secname:='.tls'
+            else if (target_info.system in systems_linux) then
+              secname:='.tbss';
+          end;
 
 
         { go32v2 stub only loads .text and .data sections, and allocates space for .bss.
         { go32v2 stub only loads .text and .data sections, and allocates space for .bss.
           Thus, data which normally goes into .rodata and .rodata_norel sections must
           Thus, data which normally goes into .rodata and .rodata_norel sections must
           end up in .data section }
           end up in .data section }
         if (atype in [sec_rodata,sec_rodata_norel]) and
         if (atype in [sec_rodata,sec_rodata_norel]) and
-          (target_info.system=system_i386_go32v2) then
+          (target_info.system in [system_i386_go32v2,system_m68k_palmos]) then
           secname:='.data';
           secname:='.data';
 
 
         { Windows correctly handles reallocations in readonly sections }
         { Windows correctly handles reallocations in readonly sections }
@@ -362,9 +373,18 @@ implementation
           (target_info.system in systems_all_windows+systems_nativent-[system_i8086_win16]) then
           (target_info.system in systems_all_windows+systems_nativent-[system_i8086_win16]) then
           secname:='.rodata';
           secname:='.rodata';
 
 
-        { Use .rodata for Android }
-        if (target_info.system in systems_android) and (atype in [sec_rodata,sec_rodata_norel]) then
-          secname:='.rodata';
+        { Use .rodata and .data.rel.ro for Android with PIC }
+        if (target_info.system in systems_android) and (cs_create_pic in current_settings.moduleswitches) then
+          begin
+            case atype of
+              sec_rodata:
+                secname:='.data.rel.ro';
+              sec_rodata_norel:
+                secname:='.rodata';
+              else
+                ;
+            end;
+          end;
 
 
         { section type user gives the user full controll on the section name }
         { section type user gives the user full controll on the section name }
         if atype=sec_user then
         if atype=sec_user then
@@ -434,7 +454,7 @@ implementation
       end;
       end;
 
 
 
 
-    function TGNUAssembler.sectionalignment_aix(atype:TAsmSectiontype;secalign: byte): string;
+    function TGNUAssembler.sectionalignment_aix(atype:TAsmSectiontype;secalign: longint): string;
       var
       var
         l: longint;
         l: longint;
       begin
       begin
@@ -450,11 +470,16 @@ implementation
       end;
       end;
 
 
 
 
-    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:byte);
+    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);
       var
       var
         s : string;
         s : string;
+        secflag: TSectionFlag;
+        sectionprogbits,
+        sectionflags: boolean;
       begin
       begin
         writer.AsmLn;
         writer.AsmLn;
+        sectionflags:=false;
+        sectionprogbits:=false;
         case target_info.system of
         case target_info.system of
          system_i386_OS2,
          system_i386_OS2,
          system_i386_EMX: ;
          system_i386_EMX: ;
@@ -463,7 +488,21 @@ implementation
            begin
            begin
              { ... but vasm is GAS compatible on amiga/atari, and supports named sections }
              { ... but vasm is GAS compatible on amiga/atari, and supports named sections }
              if create_smartlink_sections then
              if create_smartlink_sections then
-               writer.AsmWrite('.section ');
+               begin
+                 writer.AsmWrite('.section ');
+                 sectionflags:=true;
+                 sectionprogbits:=true;
+               end;
+           end;
+         system_i386_win32,
+         system_x86_64_win64,
+         system_i386_wince,
+         system_arm_wince:
+           begin
+             { according to the GNU AS guide AS for COFF does not support the
+               progbits }
+             writer.AsmWrite('.section ');
+             sectionflags:=true;
            end;
            end;
          system_powerpc_darwin,
          system_powerpc_darwin,
          system_i386_darwin,
          system_i386_darwin,
@@ -480,60 +519,107 @@ implementation
                writer.AsmWrite('.section ');
                writer.AsmWrite('.section ');
            end
            end
          else
          else
-          writer.AsmWrite('.section ');
+           begin
+             writer.AsmWrite('.section ');
+             { sectionname may rename those sections, so we do not write flags/progbits for them,
+               the assembler will ignore them/spite out a warning anyways }
+             if not(atype in [sec_data,sec_rodata,sec_rodata_norel]) then
+               begin
+                 sectionflags:=true;
+                 sectionprogbits:=true;
+               end;
+           end
         end;
         end;
         s:=sectionname(atype,aname,aorder);
         s:=sectionname(atype,aname,aorder);
         writer.AsmWrite(s);
         writer.AsmWrite(s);
-        case atype of
-          sec_fpc :
-            if aname = 'resptrs' then
-              writer.AsmWrite(', "a", @progbits');
-          sec_stub :
-            begin
-              case target_info.system of
-                { there are processor-independent shortcuts available    }
-                { for this, namely .symbol_stub and .picsymbol_stub, but }
-                { they don't work and gcc doesn't use them either...     }
-                system_powerpc_darwin,
-                system_powerpc64_darwin:
-                  if (cs_create_pic in current_settings.moduleswitches) then
-                    writer.AsmWriteln('__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32')
-                  else
-                    writer.AsmWriteln('__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16');
-                system_i386_darwin,
-                system_i386_iphonesim:
-                  writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
-                system_arm_darwin:
-                  if (cs_create_pic in current_settings.moduleswitches) then
-                    writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
+        { flags explicitly defined? }
+        if (sectionflags or sectionprogbits) and
+           ((secflags<>[]) or
+            (secprogbits<>SPB_None)) then
+          begin
+            if sectionflags then
+              begin
+                s:=',"';
+                for secflag in secflags do
+                  case secflag of
+                    SF_A:
+                      s:=s+'a';
+                    SF_W:
+                      s:=s+'w';
+                    SF_X:
+                      s:=s+'x';
+                  end;
+                writer.AsmWrite(s+'"');
+              end;
+            if sectionprogbits then
+              begin
+                case secprogbits of
+                  SPB_PROGBITS:
+                    writer.AsmWrite(',%progbits');
+                  SPB_NOBITS:
+                    writer.AsmWrite(',%nobits');
+                  SPB_NOTE:
+                    writer.AsmWrite(',%note');
+                  SPB_None:
+                    ;
                   else
                   else
-                    writer.AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
-                { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
-                  explicit symbol stubs }
-                else
-                  internalerror(2006031101);
+                    InternalError(2019100801);
+                end;
               end;
               end;
-            end;
+          end
         else
         else
-          { GNU AS won't recognize '.text.n_something' section name as belonging
-            to '.text' and assigns default attributes to it, which is not
-            always correct. We have to fix it.
+          case atype of
+            sec_fpc :
+              if aname = 'resptrs' then
+                writer.AsmWrite(', "a", @progbits');
+            sec_stub :
+              begin
+                case target_info.system of
+                  { there are processor-independent shortcuts available    }
+                  { for this, namely .symbol_stub and .picsymbol_stub, but }
+                  { they don't work and gcc doesn't use them either...     }
+                  system_powerpc_darwin,
+                  system_powerpc64_darwin:
+                    if (cs_create_pic in current_settings.moduleswitches) then
+                      writer.AsmWriteln('__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32')
+                    else
+                      writer.AsmWriteln('__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16');
+                  system_i386_darwin,
+                  system_i386_iphonesim:
+                    writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
+                  system_arm_darwin:
+                    if (cs_create_pic in current_settings.moduleswitches) then
+                      writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
+                    else
+                      writer.AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
+                  { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
+                    explicit symbol stubs }
+                  else
+                    internalerror(2006031101);
+                end;
+              end;
+          else
+            { GNU AS won't recognize '.text.n_something' section name as belonging
+              to '.text' and assigns default attributes to it, which is not
+              always correct. We have to fix it.
 
 
-            TODO: This likely applies to all systems which smartlink without
-            creating libraries }
-          if is_smart_section(atype) and (aname<>'') then
+              TODO: This likely applies to all systems which smartlink without
+              creating libraries }
             begin
             begin
-              s:=sectionattrs(atype);
-              if (s<>'') then
-                writer.AsmWrite(',"'+s+'"');
-            end
-         else if target_info.system in systems_aix then
-           begin
-             s:=sectionalignment_aix(atype,secalign);
-             if s<>'' then
-               writer.AsmWrite(','+s);
-           end;
-        end;
+              if is_smart_section(atype) and (aname<>'') then
+                begin
+                  s:=sectionattrs(atype);
+                  if (s<>'') then
+                    writer.AsmWrite(',"'+s+'"');
+                end;
+              if target_info.system in systems_aix then
+                begin
+                  s:=sectionalignment_aix(atype,secalign);
+                  if s<>'' then
+                    writer.AsmWrite(','+s);
+                end;
+            end;
+          end;
         writer.AsmLn;
         writer.AsmLn;
         LastSecType:=atype;
         LastSecType:=atype;
       end;
       end;
@@ -544,7 +630,7 @@ implementation
         i,len : longint;
         i,len : longint;
         buf   : array[0..63] of byte;
         buf   : array[0..63] of byte;
       begin
       begin
-        len:=EncodeUleb128(a,buf);
+        len:=EncodeUleb128(a,buf,0);
         for i:=0 to len-1 do
         for i:=0 to len-1 do
           begin
           begin
             if (i > 0) then
             if (i > 0) then
@@ -554,12 +640,45 @@ implementation
       end;
       end;
 
 
 
 
+    procedure TGNUAssembler.WriteCFI(hp: tai_cfi_base);
+      begin
+        writer.AsmWrite(cfi2str[hp.cfityp]);
+        case hp.cfityp of
+          cfi_startproc,
+          cfi_endproc:
+            ;
+          cfi_undefined,
+          cfi_restore,
+          cfi_def_cfa_register:
+            begin
+              writer.AsmWrite(' ');
+              writer.AsmWrite(gas_regname(tai_cfi_op_reg(hp).reg1));
+            end;
+          cfi_def_cfa_offset:
+            begin
+              writer.AsmWrite(' ');
+              writer.AsmWrite(tostr(tai_cfi_op_val(hp).val1));
+            end;
+          cfi_offset:
+            begin
+              writer.AsmWrite(' ');
+              writer.AsmWrite(gas_regname(tai_cfi_op_reg_val(hp).reg1));
+              writer.AsmWrite(',');
+              writer.AsmWrite(tostr(tai_cfi_op_reg_val(hp).val));
+            end;
+          else
+            internalerror(2019030203);
+        end;
+        writer.AsmLn;
+      end;
+
+
     procedure TGNUAssembler.WriteDecodedSleb128(a: int64);
     procedure TGNUAssembler.WriteDecodedSleb128(a: int64);
       var
       var
         i,len : longint;
         i,len : longint;
         buf   : array[0..255] of byte;
         buf   : array[0..255] of byte;
       begin
       begin
-        len:=EncodeSleb128(a,buf);
+        len:=EncodeSleb128(a,buf,0);
         for i:=0 to len-1 do
         for i:=0 to len-1 do
           begin
           begin
             if (i > 0) then
             if (i > 0) then
@@ -583,9 +702,10 @@ implementation
         end;
         end;
 
 
 
 
-      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; out last_align: longint;lasthp:tai);
+      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; maxbytes: byte; out last_align: longint;lasthp:tai);
         var
         var
           i: longint;
           i: longint;
+          alignment64 : int64;
 {$ifdef m68k}
 {$ifdef m68k}
           instr : string;
           instr : string;
 {$endif}
 {$endif}
@@ -612,14 +732,33 @@ implementation
                   else
                   else
                     begin
                     begin
 {$endif m68k}
 {$endif m68k}
-                  writer.AsmWrite(#9'.balign '+tostr(alignment));
-                  if use_op then
-                    writer.AsmWrite(','+tostr(fillop))
+                      alignment64:=alignment;
+                      if (maxbytes<>alignment) and ispowerof2(alignment64,i) then
+                        begin
+                          if use_op then
+                            begin
+                              writer.AsmWrite(#9'.p2align '+tostr(i)+','+tostr(fillop)+','+tostr(maxbytes));
+                              writer.AsmLn;
+                              writer.AsmWrite(#9'.p2align '+tostr(i-1)+','+tostr(fillop));
+                            end
+                          else
+                            begin
+                              writer.AsmWrite(#9'.p2align '+tostr(i)+',,'+tostr(maxbytes));
+                              writer.AsmLn;
+                              writer.AsmWrite(#9'.p2align '+tostr(i-1));
+                            end
+                        end
+                      else
+                        begin
+                          writer.AsmWrite(#9'.balign '+tostr(alignment));
+                          if use_op then
+                            writer.AsmWrite(','+tostr(fillop))
 {$ifdef x86}
 {$ifdef x86}
-                  { force NOP as alignment op code }
-                  else if (LastSecType=sec_code) and (asminfo^.id<>as_solaris_as) then
-                    writer.AsmWrite(',0x90');
+                          { force NOP as alignment op code }
+                          else if (LastSecType=sec_code) and (asminfo^.id<>as_solaris_as) then
+                            writer.AsmWrite(',0x90');
 {$endif x86}
 {$endif x86}
+                        end;
 {$ifdef m68k}
 {$ifdef m68k}
                     end;
                     end;
 {$endif m68k}
 {$endif m68k}
@@ -709,16 +848,18 @@ implementation
 
 
            ait_align :
            ait_align :
              begin
              begin
-               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,last_align,lasthp);
+               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,tai_align_abstract(hp).maxbytes,last_align,lasthp);
              end;
              end;
 
 
            ait_section :
            ait_section :
              begin
              begin
                if tai_section(hp).sectype<>sec_none then
                if tai_section(hp).sectype<>sec_none then
                  if replaceforbidden then
                  if replaceforbidden then
-                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,tai_section(hp).secalign)
+                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,
+                     tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                  else
                  else
-                   WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,tai_section(hp).secalign)
+                   WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,
+                     tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                else
                else
                  begin
                  begin
 {$ifdef EXTDEBUG}
 {$ifdef EXTDEBUG}
@@ -738,8 +879,11 @@ implementation
                      processes). The alternate code creates some kind of common symbols
                      processes). The alternate code creates some kind of common symbols
                      in the data segment.
                      in the data segment.
                    }
                    }
+
                    if tai_datablock(hp).is_global then
                    if tai_datablock(hp).is_global then
                      begin
                      begin
+                       if tai_datablock(hp).sym.bind=AB_PRIVATE_EXTERN then
+                         WriteHiddenSymbol(tai_datablock(hp).sym);
                        writer.AsmWrite('.globl ');
                        writer.AsmWrite('.globl ');
                        writer.AsmWriteln(tai_datablock(hp).sym.name);
                        writer.AsmWriteln(tai_datablock(hp).sym.name);
                        writer.AsmWriteln('.data');
                        writer.AsmWriteln('.data');
@@ -818,6 +962,8 @@ implementation
                      begin
                      begin
                        if Tai_datablock(hp).is_global then
                        if Tai_datablock(hp).is_global then
                          begin
                          begin
+                           if (tai_datablock(hp).sym.bind=AB_PRIVATE_EXTERN) then
+                             WriteHiddenSymbol(tai_datablock(hp).sym);
                            writer.AsmWrite(#9'.globl ');
                            writer.AsmWrite(#9'.globl ');
                            if replaceforbidden then
                            if replaceforbidden then
                              writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name))
                              writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name))
@@ -884,7 +1030,44 @@ implementation
                         WriteAixIntConst(tai_const(hp));
                         WriteAixIntConst(tai_const(hp));
                       writer.AsmLn;
                       writer.AsmLn;
                     end;
                     end;
+                 aitconst_gottpoff:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tlsgd:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsgd)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tlsdesc:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsdesc)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tpoff:
+                   begin
+                     if assigned(tai_const(hp).endsym) or (tai_const(hp).symofs<>0) then
+                       Internalerror(2019092805);
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tpoff)');
+                     writer.Asmln;
+                   end;
 {$endif cpu64bitaddr}
 {$endif cpu64bitaddr}
+                 aitconst_dtpoff:
+                   begin
+{$ifdef arm}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsldo)');
+                     writer.Asmln;
+{$endif arm}
+{$ifdef x86_64}
+                     writer.AsmWrite(#9'.long'#9+tai_const(hp).sym.name+'@dtpoff');
+                     writer.Asmln;
+{$endif x86_64}
+{$ifdef i386}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'@tdpoff');
+                     writer.Asmln;
+{$endif i386}
+                   end;
                  aitconst_got:
                  aitconst_got:
                    begin
                    begin
                      if tai_const(hp).symofs<>0 then
                      if tai_const(hp).symofs<>0 then
@@ -958,6 +1141,8 @@ implementation
                              WriteDecodedUleb128(qword(tai_const(hp).value));
                              WriteDecodedUleb128(qword(tai_const(hp).value));
                            aitconst_sleb128bit:
                            aitconst_sleb128bit:
                              WriteDecodedSleb128(int64(tai_const(hp).value));
                              WriteDecodedSleb128(int64(tai_const(hp).value));
+                           else
+                             ;
                          end
                          end
                        end
                        end
                      else
                      else
@@ -1111,14 +1296,6 @@ implementation
 
 
            ait_symbol :
            ait_symbol :
              begin
              begin
-               if (tai_symbol(hp).sym.bind=AB_PRIVATE_EXTERN) then
-                 begin
-                   writer.AsmWrite(#9'.private_extern ');
-                   if replaceforbidden then
-                     writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
-                   else
-                     writer.AsmWriteln(tai_symbol(hp).sym.name);
-                 end;
                if (target_info.system=system_powerpc64_linux) and
                if (target_info.system=system_powerpc64_linux) and
                   (tai_symbol(hp).sym.typ=AT_FUNCTION) and
                   (tai_symbol(hp).sym.typ=AT_FUNCTION) and
                   (cs_profile in current_settings.moduleswitches) then
                   (cs_profile in current_settings.moduleswitches) then
@@ -1131,6 +1308,8 @@ implementation
                     writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
                     writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
                   else
                   else
                     writer.AsmWriteln(tai_symbol(hp).sym.name);
                     writer.AsmWriteln(tai_symbol(hp).sym.name);
+                  if (tai_symbol(hp).sym.bind=AB_PRIVATE_EXTERN) then
+                    WriteHiddenSymbol(tai_symbol(hp).sym);
                 end;
                 end;
                if (target_info.system=system_powerpc64_linux) and
                if (target_info.system=system_powerpc64_linux) and
                   use_dotted_functions and
                   use_dotted_functions and
@@ -1211,14 +1390,26 @@ implementation
                if replaceforbidden then
                if replaceforbidden then
                  begin
                  begin
                    { avoid string truncation }
                    { avoid string truncation }
-                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^)+s);
+                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                   writer.AsmWrite(s);
                    writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).value^));
                    writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).value^));
+                   if tai_symbolpair(hp).kind=spk_set_global then
+                     begin
+                       writer.AsmWrite(#9'.globl ');
+                       writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                     end;
                  end
                  end
                else
                else
                  begin
                  begin
                    { avoid string truncation }
                    { avoid string truncation }
-                   writer.AsmWrite(tai_symbolpair(hp).sym^+s);
+                   writer.AsmWrite(tai_symbolpair(hp).sym^);
+                   writer.AsmWrite(s);
                    writer.AsmWriteLn(tai_symbolpair(hp).value^);
                    writer.AsmWriteLn(tai_symbolpair(hp).value^);
+                   if tai_symbolpair(hp).kind=spk_set_global then
+                     begin
+                       writer.AsmWrite(#9'.globl ');
+                       writer.AsmWriteLn(tai_symbolpair(hp).sym^);
+                     end;
                  end;
                  end;
              end;
              end;
            ait_symbol_end :
            ait_symbol_end :
@@ -1361,6 +1552,22 @@ implementation
                    std_regname(tai_varloc(hp).newlocation)));
                    std_regname(tai_varloc(hp).newlocation)));
                writer.AsmLn;
                writer.AsmLn;
              end;
              end;
+           ait_cfi:
+             begin
+               WriteCFI(tai_cfi_base(hp));
+             end;
+           ait_eabi_attribute:
+             begin
+               case tai_eabi_attribute(hp).eattr_typ of
+                 eattrtype_dword:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+','+tostr(tai_eabi_attribute(hp).value));
+                 eattrtype_ntbs:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+',"'+tai_eabi_attribute(hp).valuestr^+'"');
+                 else
+                   Internalerror(2019100601);
+               end;
+               writer.AsmLn;
+             end;
            else
            else
              internalerror(2006012201);
              internalerror(2006012201);
          end;
          end;
@@ -1388,7 +1595,29 @@ implementation
 
 
     procedure TGNUAssembler.WriteWeakSymbolRef(s: tasmsymbol);
     procedure TGNUAssembler.WriteWeakSymbolRef(s: tasmsymbol);
       begin
       begin
-        writer.AsmWriteLn(#9'.weak '+s.name);
+        writer.AsmWrite(#9'.weak ');
+        if asminfo^.dollarsign='$' then
+          writer.AsmWriteLn(s.name)
+        else
+          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(s.name))
+      end;
+
+
+    procedure TGNUAssembler.WriteHiddenSymbol(sym: TAsmSymbol);
+      begin
+        { on Windows/(PE)COFF, global symbols are hidden by default: global
+          symbols that are not explicitly exported from an executable/library,
+          become hidden }
+        if target_info.system in systems_windows then
+          exit;
+        if target_info.system in systems_darwin then
+          writer.AsmWrite(#9'.private_extern ')
+        else
+          writer.AsmWrite(#9'.hidden ');
+        if asminfo^.dollarsign='$' then
+          writer.AsmWriteLn(sym.name)
+        else
+          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(sym.name))
       end;
       end;
 
 
 
 
@@ -1706,6 +1935,8 @@ implementation
                 result:='.section '+objc_section_name(atype);
                 result:='.section '+objc_section_name(atype);
                 exit
                 exit
               end;
               end;
+            else
+              ;
           end;
           end;
         result := inherited sectionname(atype,aname,aorder);
         result := inherited sectionname(atype,aname,aorder);
       end;
       end;
@@ -1812,7 +2043,9 @@ implementation
          sec_none (* sec_objc_nlcatlist *),
          sec_none (* sec_objc_nlcatlist *),
          sec_none (* sec_objc_protlist *),
          sec_none (* sec_objc_protlist *),
          sec_none (* sec_stack *),
          sec_none (* sec_stack *),
-         sec_none (* sec_heap *)
+         sec_none (* sec_heap *),
+         sec_none (* gcc_except_table *),
+         sec_none (* sec_arm_attribute *)
         );
         );
       begin
       begin
         Result := inherited SectionName (SecXTable [AType], AName, AOrder);
         Result := inherited SectionName (SecXTable [AType], AName, AOrder);

+ 22 - 5
compiler/aopt.pas

@@ -26,6 +26,7 @@ Unit aopt;
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
 { $define DEBUG_OPTALLOC}
 { $define DEBUG_OPTALLOC}
+{ $define DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
 
 
   Interface
   Interface
 
 
@@ -35,6 +36,9 @@ Unit aopt;
 
 
     Type
     Type
       TAsmOptimizer = class(TAoptObj)
       TAsmOptimizer = class(TAoptObj)
+        { Pooled object that can be used by optimisation procedures to evaluate
+          future register usage without upsetting the current state. }
+        TmpUsedRegs: TAllUsedRegs;
 
 
         { _AsmL is the PAasmOutpout list that has to be optimized }
         { _AsmL is the PAasmOutpout list that has to be optimized }
         Constructor create(_AsmL: TAsmList); virtual; reintroduce;
         Constructor create(_AsmL: TAsmList); virtual; reintroduce;
@@ -49,9 +53,9 @@ Unit aopt;
         { Builds a table with the locations of the labels in the TAsmList.
         { Builds a table with the locations of the labels in the TAsmList.
           Also fixes some RegDeallocs like "# %eax released; push (%eax)"  }
           Also fixes some RegDeallocs like "# %eax released; push (%eax)"  }
         Procedure BuildLabelTableAndFixRegAlloc;
         Procedure BuildLabelTableAndFixRegAlloc;
-        procedure clear;
       protected
       protected
         procedure pass_1;
         procedure pass_1;
+        procedure clear;
       End;
       End;
       TAsmOptimizerClass = class of TAsmOptimizer;
       TAsmOptimizerClass = class of TAsmOptimizer;
 
 
@@ -75,17 +79,19 @@ Unit aopt;
 
 
     uses
     uses
       cutils,
       cutils,
+      cprofile,
       globtype, globals,
       globtype, globals,
       verbose,
       verbose,
       cpubase,
       cpubase,
       cgbase,
       cgbase,
-      aoptda,aoptcpu,aoptcpud;
+      aoptcpu;
 
 
     Constructor TAsmOptimizer.create(_AsmL: TAsmList);
     Constructor TAsmOptimizer.create(_AsmL: TAsmList);
       Begin
       Begin
         inherited create(_asml,nil,nil,nil);
         inherited create(_asml,nil,nil,nil);
         { setup labeltable, always necessary }
         { setup labeltable, always necessary }
         New(LabelInfo);
         New(LabelInfo);
+        CreateUsedRegs(TmpUsedRegs);
       End;
       End;
 
 
     procedure TAsmOptimizer.FindLoHiLabels;
     procedure TAsmOptimizer.FindLoHiLabels;
@@ -142,6 +148,7 @@ Unit aopt;
           p := BlockStart;
           p := BlockStart;
           While (P <> BlockEnd) Do
           While (P <> BlockEnd) Do
             Begin
             Begin
+              prefetch(pointer(p.Next)^);
               Case p.typ Of
               Case p.typ Of
                 ait_Label:
                 ait_Label:
                   begin
                   begin
@@ -185,7 +192,6 @@ Unit aopt;
                       End
                       End
                     else if tai_regalloc(p).ratype=ra_dealloc then
                     else if tai_regalloc(p).ratype=ra_dealloc then
                       Begin
                       Begin
-                        ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                         hp1 := p;
                         hp1 := p;
                         hp2 := nil;
                         hp2 := nil;
                         While Not(assigned(FindRegAlloc(tai_regalloc(p).Reg, tai(hp1.Next)))) And
                         While Not(assigned(FindRegAlloc(tai_regalloc(p).Reg, tai(hp1.Next)))) And
@@ -226,9 +232,13 @@ Unit aopt;
                             AsmL.remove(p);
                             AsmL.remove(p);
                             p.free;
                             p.free;
                             p := hp1;
                             p := hp1;
-                          end;
+                          end
+                        else
+                          ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                       End
                       End
                   End
                   End
+                else
+                  ;
               End;
               End;
               P := tai(p.Next);
               P := tai(p.Next);
               While Assigned(p) and
               While Assigned(p) and
@@ -317,6 +327,7 @@ Unit aopt;
 
 
     Destructor TAsmOptimizer.Destroy;
     Destructor TAsmOptimizer.Destroy;
       Begin
       Begin
+        ReleaseUsedRegs(TmpUsedRegs);
         if assigned(LabelInfo^.LabelTable) then
         if assigned(LabelInfo^.LabelTable) then
           Freemem(LabelInfo^.LabelTable);
           Freemem(LabelInfo^.LabelTable);
         Dispose(LabelInfo);
         Dispose(LabelInfo);
@@ -337,6 +348,7 @@ Unit aopt;
         p:=BlockStart;
         p:=BlockStart;
         while p<>BlockEnd Do
         while p<>BlockEnd Do
           begin
           begin
+            prefetch(pointer(p.Next)^);
             if SchedulerPass1Cpu(p) then
             if SchedulerPass1Cpu(p) then
               continue;
               continue;
             p:=tai(p.next);
             p:=tai(p.next);
@@ -379,9 +391,14 @@ Unit aopt;
       var
       var
         p : TAsmOptimizer;
         p : TAsmOptimizer;
       begin
       begin
+        ResumeTimer(ct_aopt);
         p:=casmoptimizer.Create(AsmL);
         p:=casmoptimizer.Create(AsmL);
         p.Optimize;
         p.Optimize;
-        p.free
+{$ifdef DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
+        p.Debug_InsertInstrRegisterDependencyInfo;
+{$endif DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
+        p.free;
+        StopTimer;
       end;
       end;
 
 
 
 

+ 49 - 25
compiler/aoptbase.pas

@@ -49,9 +49,9 @@ unit aoptbase;
         { returns true if register Reg is used by instruction p1 }
         { returns true if register Reg is used by instruction p1 }
         Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;virtual;
         Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;virtual;
         { returns true if register Reg occurs in operand op }
         { returns true if register Reg occurs in operand op }
-        Function RegInOp(Reg: TRegister; const op: toper): Boolean;
+        class function RegInOp(Reg: TRegister; const op: toper): Boolean; static;
         { returns true if register Reg is used in the reference Ref }
         { returns true if register Reg is used in the reference Ref }
-        Function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+        class function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean; static;
 
 
         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;virtual;
         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;virtual;
 
 
@@ -61,13 +61,13 @@ unit aoptbase;
         { gets the next tai object after current that contains info relevant }
         { gets the next tai object after current that contains info relevant }
         { to the optimizer in p1. If there is none, it returns false and     }
         { to the optimizer in p1. If there is none, it returns false and     }
         { sets p1 to nil                                                     }
         { sets p1 to nil                                                     }
-        class Function GetNextInstruction(Current: tai; Var Next: tai): Boolean;
-        { gets the previous tai object after current that contains info  }
-        { relevant to the optimizer in last. If there is none, it retuns }
-        { false and sets last to nil                                     }
-        Function GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+        class function GetNextInstruction(Current: tai; out Next: tai): Boolean; static;
+        { gets the previous tai object after current that contains info   }
+        { relevant to the optimizer in last. If there is none, it returns }
+        { false and sets last to nil                                      }
+        class function GetLastInstruction(Current: tai; out Last: tai): Boolean; static;
 
 
-        function SkipEntryExitMarker(current: tai; var next: tai): boolean;
+        class function SkipEntryExitMarker(current: tai; out next: tai): boolean; static;
 
 
         { processor dependent methods }
         { processor dependent methods }
 
 
@@ -104,15 +104,15 @@ unit aoptbase;
 
 
         { compares reg1 and reg2 having the same type and being the same super registers
         { compares reg1 and reg2 having the same type and being the same super registers
           so the register size is neglected }
           so the register size is neglected }
-        function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+        class function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
     end;
     end;
 
 
-    function labelCanBeSkipped(p: tai_label): boolean;
+    function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
 
 
   implementation
   implementation
 
 
     uses
     uses
-      verbose,globtype,globals,aoptcpub;
+      verbose,globals,aoptcpub;
 
 
   constructor taoptbase.create;
   constructor taoptbase.create;
     begin
     begin
@@ -140,7 +140,7 @@ unit aoptbase;
     End;
     End;
 
 
 
 
-  Function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
+  class function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
     Begin
     Begin
       Case op.typ Of
       Case op.typ Of
         Top_Reg: RegInOp := SuperRegistersEqual(Reg,op.reg);
         Top_Reg: RegInOp := SuperRegistersEqual(Reg,op.reg);
@@ -154,12 +154,20 @@ unit aoptbase;
     End;
     End;
 
 
 
 
-  Function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+  class function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
   Begin
   Begin
     RegInRef := SuperRegistersEqual(Ref.Base,Reg)
     RegInRef := SuperRegistersEqual(Ref.Base,Reg)
 {$ifdef cpurefshaveindexreg}
 {$ifdef cpurefshaveindexreg}
     Or SuperRegistersEqual(Ref.Index,Reg)
     Or SuperRegistersEqual(Ref.Index,Reg)
 {$endif cpurefshaveindexreg}
 {$endif cpurefshaveindexreg}
+{$ifdef x86}
+    or (Reg=Ref.segment)
+    { if Ref.segment isn't set, the cpu uses implicitly ss or ds, depending on the base register }
+    or ((Ref.segment=NR_NO) and (
+      ((Reg=NR_SS) and (SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP))) or
+      ((Reg=NR_DS) and not(SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP)))
+    ))
+{$endif x86}
   End;
   End;
 
 
   Function TAOptBase.RegModifiedByInstruction(Reg: TRegister; p1: tai): Boolean;
   Function TAOptBase.RegModifiedByInstruction(Reg: TRegister; p1: tai): Boolean;
@@ -168,26 +176,31 @@ unit aoptbase;
   End;
   End;
 
 
 
 
-  function labelCanBeSkipped(p: tai_label): boolean;
+  function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
   begin
   begin
     labelCanBeSkipped := not(p.labsym.is_used) or (p.labsym.labeltype<>alt_jump);
     labelCanBeSkipped := not(p.labsym.is_used) or (p.labsym.labeltype<>alt_jump);
   end;
   end;
 
 
 
 
-  class Function TAOptBase.GetNextInstruction(Current: tai; Var Next: tai): Boolean;
+  class function TAOptBase.GetNextInstruction(Current: tai; out Next: tai): Boolean;
   Begin
   Begin
     Repeat
     Repeat
       Current := tai(Current.Next);
       Current := tai(Current.Next);
       While Assigned(Current) And
       While Assigned(Current) And
             ((Current.typ In SkipInstr) or
             ((Current.typ In SkipInstr) or
-{$if defined(SPARC) or defined(MIPS)}
+{$ifdef cpudelayslot}
              ((Current.typ=ait_instruction) and
              ((Current.typ=ait_instruction) and
               (taicpu(Current).opcode=A_NOP)
               (taicpu(Current).opcode=A_NOP)
              ) or
              ) or
-{$endif SPARC or MIPS}
+{$endif cpudelayslot}
              ((Current.typ = ait_label) And
              ((Current.typ = ait_label) And
               labelCanBeSkipped(Tai_Label(Current)))) Do
               labelCanBeSkipped(Tai_Label(Current)))) Do
-        Current := tai(Current.Next);
+        begin
+          { this won't help the current loop, but it helps when returning from GetNextInstruction
+            as the next entry is probably already in the cache }
+          prefetch(pointer(Current.Next)^);
+          Current := Tai(Current.Next);
+        end;
       If Assigned(Current) And
       If Assigned(Current) And
          (Current.typ = ait_Marker) And
          (Current.typ = ait_Marker) And
          (Tai_Marker(Current).Kind = mark_NoPropInfoStart) Then
          (Tai_Marker(Current).Kind = mark_NoPropInfoStart) Then
@@ -195,7 +208,12 @@ unit aoptbase;
           While Assigned(Current) And
           While Assigned(Current) And
                 ((Current.typ <> ait_Marker) Or
                 ((Current.typ <> ait_Marker) Or
                  (Tai_Marker(Current).Kind <> mark_NoPropInfoEnd)) Do
                  (Tai_Marker(Current).Kind <> mark_NoPropInfoEnd)) Do
-            Current := Tai(Current.Next);
+            begin
+              { this won't help the current loop, but it helps when returning from GetNextInstruction
+                as the next entry is probably already in the cache }
+              prefetch(pointer(Current.Next)^);
+              Current := Tai(Current.Next);
+            end;
         End;
         End;
     Until Not(Assigned(Current)) Or
     Until Not(Assigned(Current)) Or
           (Current.typ <> ait_Marker) Or
           (Current.typ <> ait_Marker) Or
@@ -213,7 +231,7 @@ unit aoptbase;
         End;
         End;
   End;
   End;
 
 
-  Function TAOptBase.GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+  class function TAOptBase.GetLastInstruction(Current: tai; out Last: tai): Boolean;
   Begin
   Begin
     Repeat
     Repeat
       Current := Tai(Current.previous);
       Current := Tai(Current.previous);
@@ -255,12 +273,12 @@ unit aoptbase;
   End;
   End;
 
 
 
 
-  function TAOptBase.SkipEntryExitMarker(current: tai; var next: tai): boolean;
+  class function TAOptBase.SkipEntryExitMarker(current: tai; out next: tai): boolean;
     begin
     begin
       result:=true;
       result:=true;
+      next:=current;
       if current.typ<>ait_marker then
       if current.typ<>ait_marker then
         exit;
         exit;
-      next:=current;
       while GetNextInstruction(next,next) do
       while GetNextInstruction(next,next) do
         begin
         begin
           if (next.typ<>ait_marker) or not(tai_marker(next).Kind in [mark_Position,mark_BlockStart]) then
           if (next.typ<>ait_marker) or not(tai_marker(next).Kind in [mark_Position,mark_BlockStart]) then
@@ -308,10 +326,16 @@ unit aoptbase;
     end;
     end;
 
 
 
 
-  function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+  class function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;{$ifdef USEINLINE}inline;{$endif}
   Begin
   Begin
-    Result:=(getregtype(reg1) = getregtype(reg2)) and
-            (getsupreg(reg1) = getsupreg(Reg2));
+    { Do an optimized version of
+
+      Result:=(getregtype(reg1) = getregtype(reg2)) and
+      (getsupreg(reg1) = getsupreg(Reg2));
+
+      as SuperRegistersEqual is used a lot
+    }
+    Result:=(DWord(reg1) and $ff00ffff)=(DWord(reg2) and $ff00ffff);
   end;
   end;
 
 
   { ******************* Processor dependent stuff *************************** }
   { ******************* Processor dependent stuff *************************** }

+ 2 - 2
compiler/aoptda.pas

@@ -29,7 +29,7 @@ Unit aoptda;
 
 
     uses
     uses
       cpubase,cgbase,
       cpubase,cgbase,
-      aasmbase,aasmtai,aasmdata,aasmcpu,
+      aasmtai,aasmdata,aasmcpu,
       aoptcpub, aoptbase;
       aoptcpub, aoptbase;
 
 
     Type
     Type
@@ -56,7 +56,7 @@ Unit aoptda;
   Implementation
   Implementation
 
 
     uses
     uses
-      globals, aoptobj;
+      globals;
 
 
     Procedure TAOptDFA.DoDFA;
     Procedure TAOptDFA.DoDFA;
     { Analyzes the Data Flow of an assembler list. Analyses the reg contents     }
     { Analyzes the Data Flow of an assembler list. Analyses the reg contents     }

File diff suppressed because it is too large
+ 1198 - 140
compiler/aoptobj.pas


+ 52 - 6
compiler/aoptutils.pas

@@ -27,23 +27,69 @@ unit aoptutils;
   interface
   interface
 
 
     uses
     uses
-      aasmtai,aasmcpu;
+      cpubase,aasmtai,aasmcpu;
 
 
     function MatchOpType(const p : taicpu;type0: toptype) : Boolean;
     function MatchOpType(const p : taicpu;type0: toptype) : Boolean;
     function MatchOpType(const p : taicpu;type0,type1 : toptype) : Boolean;
     function MatchOpType(const p : taicpu;type0,type1 : toptype) : Boolean;
+{$if max_operands>2}
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
+{$endif max_operands>2}
+
+    { skips all labels and returns the next "real" instruction }
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
+
+    { sets hp2 to hp and returns True if hp is not nil }
+    function SetAndTest(const hp: tai; out hp2: tai): Boolean;
 
 
   implementation
   implementation
 
 
-    function MatchOpType(const p : taicpu; type0: toptype) : Boolean;
+    uses
+      aasmbase;
+
+
+    function MatchOpType(const p : taicpu; type0: toptype) : Boolean; inline;
       begin
       begin
-        Result:=(p.oper[0]^.typ=type0);
+        Result:=(p.ops=1) and (p.oper[0]^.typ=type0);
       end;
       end;
 
 
 
 
-    function MatchOpType(const p : taicpu; type0,type1 : toptype) : Boolean;
+    function MatchOpType(const p : taicpu; type0,type1 : toptype) : Boolean; inline;
       begin
       begin
-        Result:=(p.oper[0]^.typ=type0) and (p.oper[0]^.typ=type1);
+        Result:=(p.ops=2) and (p.oper[0]^.typ=type0) and (p.oper[1]^.typ=type1);
       end;
       end;
 
 
-end.
 
 
+{$if max_operands>2}
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean; inline;
+      begin
+        Result:=(p.ops=3) and (p.oper[0]^.typ=type0) and (p.oper[1]^.typ=type1) and (p.oper[2]^.typ=type2);
+      end;
+{$endif max_operands>2}
+
+
+    { skips all labels and returns the next "real" instruction }
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
+      begin
+        while assigned(hp.next) and
+              (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
+          hp := tai(hp.next);
+        if assigned(hp.next) then
+          begin
+            SkipLabels := True;
+            hp2 := tai(hp.next)
+          end
+        else
+          begin
+            hp2 := hp;
+            SkipLabels := False
+          end;
+      end;
+
+    { sets hp2 to hp and returns True if hp is not nil }
+    function SetAndTest(const hp: tai; out hp2: tai): Boolean; inline;
+      begin
+        hp2 := hp;
+        Result := Assigned(hp);
+      end;
+
+end.

+ 248 - 89
compiler/arm/aasmcpu.pas

@@ -76,6 +76,7 @@ uses
       OT_IMMTINY   = $00002100;
       OT_IMMTINY   = $00002100;
       OT_IMMSHIFTER= $00002200;
       OT_IMMSHIFTER= $00002200;
       OT_IMMEDIATEZERO = $10002200;
       OT_IMMEDIATEZERO = $10002200;
+      OT_IMMEDIATEMM     = $00002400;
       OT_IMMEDIATE24 = OT_IMM24;
       OT_IMMEDIATE24 = OT_IMM24;
       OT_SHIFTIMM  = OT_SHIFTEROP or OT_IMMSHIFTER;
       OT_SHIFTIMM  = OT_SHIFTEROP or OT_IMMSHIFTER;
       OT_SHIFTIMMEDIATE = OT_SHIFTIMM;
       OT_SHIFTIMMEDIATE = OT_SHIFTIMM;
@@ -137,6 +138,10 @@ uses
 
 
       IF_NONE   = $00000000;
       IF_NONE   = $00000000;
 
 
+      IF_EXTENSIONS = $0000000F;
+
+      IF_NEON       = $00000001;
+
       IF_ARMMASK    = $000F0000;
       IF_ARMMASK    = $000F0000;
       IF_ARM32      = $00010000;
       IF_ARM32      = $00010000;
       IF_THUMB      = $00020000;
       IF_THUMB      = $00020000;
@@ -197,9 +202,11 @@ uses
          roundingmode : troundingmode;
          roundingmode : troundingmode;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean=false);
          procedure loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean=false);
-         procedure loadconditioncode(opidx:longint;const cond:tasmcond);
+         procedure loadconditioncode(opidx:longint;const acond:tasmcond);
          procedure loadmodeflags(opidx:longint;const flags:tcpumodeflags);
          procedure loadmodeflags(opidx:longint;const flags:tcpumodeflags);
          procedure loadspecialreg(opidx:longint;const areg:tregister; const aflags:tspecialregflags);
          procedure loadspecialreg(opidx:longint;const areg:tregister; const aflags:tspecialregflags);
+         procedure loadrealconst(opidx:longint;const _value:bestreal);
+
          constructor op_none(op : tasmop);
          constructor op_none(op : tasmop);
 
 
          constructor op_reg(op : tasmop;_op1 : tregister);
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -237,6 +244,8 @@ uses
          { *M*LL }
          { *M*LL }
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
 
 
+         constructor op_reg_realconst(op : tasmop;_op1: tregister;_op2: bestreal);
+
          { this is for Jmp instructions }
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
 
 
@@ -332,6 +341,19 @@ implementation
       end;
       end;
 
 
 
 
+    procedure taicpu.loadrealconst(opidx:longint;const _value:bestreal);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_realconst then
+              clearop(opidx);
+            val_real:=_value;
+            typ:=top_realconst;
+          end;
+      end;
+
+
     procedure taicpu.loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean);
     procedure taicpu.loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean);
       var
       var
         i : byte;
         i : byte;
@@ -363,19 +385,21 @@ implementation
                    if assigned(add_reg_instruction_hook) and (i in regset^) then
                    if assigned(add_reg_instruction_hook) and (i in regset^) then
                      add_reg_instruction_hook(self,newreg(R_MMREGISTER,i,regsetsubregtype));
                      add_reg_instruction_hook(self,newreg(R_MMREGISTER,i,regsetsubregtype));
                  end;
                  end;
+             else
+               internalerror(2019050932);
            end;
            end;
          end;
          end;
       end;
       end;
 
 
 
 
-    procedure taicpu.loadconditioncode(opidx:longint;const cond:tasmcond);
+    procedure taicpu.loadconditioncode(opidx:longint;const acond:tasmcond);
       begin
       begin
         allocate_oper(opidx+1);
         allocate_oper(opidx+1);
         with oper[opidx]^ do
         with oper[opidx]^ do
          begin
          begin
            if typ<>top_conditioncode then
            if typ<>top_conditioncode then
              clearop(opidx);
              clearop(opidx);
-           cc:=cond;
+           cc:=acond;
            typ:=top_conditioncode;
            typ:=top_conditioncode;
          end;
          end;
       end;
       end;
@@ -504,6 +528,15 @@ implementation
       end;
       end;
 
 
 
 
+    constructor taicpu.op_reg_realconst(op : tasmop; _op1 : tregister; _op2 : bestreal);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadrealconst(1,_op2);
+      end;
+
+
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
        begin
        begin
          inherited create(op);
          inherited create(op);
@@ -803,7 +836,7 @@ implementation
           end
           end
         else
         else
           case opcode of
           case opcode of
-            A_ADC,A_ADD,A_AND,A_BIC,
+            A_ADC,A_ADD,A_AND,A_BIC,A_ORN,
             A_EOR,A_CLZ,A_RBIT,
             A_EOR,A_CLZ,A_RBIT,
             A_LDR,A_LDRB,A_LDRBT,A_LDRH,A_LDRSB,
             A_LDR,A_LDRB,A_LDRBT,A_LDRH,A_LDRSB,
             A_LDRSH,A_LDRT,
             A_LDRSH,A_LDRT,
@@ -832,6 +865,8 @@ implementation
             A_UXTB,A_UXTH,A_SXTB,A_SXTH,
             A_UXTB,A_UXTH,A_SXTB,A_SXTH,
             A_NEG,
             A_NEG,
             A_VABS,A_VADD,A_VCVT,A_VDIV,A_VLDR,A_VMOV,A_VMUL,A_VNEG,A_VSQRT,A_VSUB,
             A_VABS,A_VADD,A_VCVT,A_VDIV,A_VLDR,A_VMOV,A_VMUL,A_VNEG,A_VSQRT,A_VSUB,
+            A_VEOR,
+            A_VMRS,A_VMSR,
             A_MRS,A_MSR:
             A_MRS,A_MSR:
               if opnr=0 then
               if opnr=0 then
                 result:=operand_write
                 result:=operand_write
@@ -867,7 +902,9 @@ implementation
                 result := operand_read;
                 result := operand_read;
             //Thumb2
             //Thumb2
             A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS, A_BFI,
             A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS, A_BFI,
-            A_SMMLA,A_SMMLS:
+            A_QADD,
+            A_PKHTB,A_PKHBT,
+            A_SMMLA,A_SMMLS,A_SMUAD,A_SMUSD:
               if opnr in [0] then
               if opnr in [0] then
                 result:=operand_write
                 result:=operand_write
               else
               else
@@ -886,7 +923,10 @@ implementation
             A_STREX:
             A_STREX:
               result:=operand_write;
               result:=operand_write;
             else
             else
-              internalerror(200403151);
+              begin
+                writeln(opcode);
+                internalerror(200403151);
+              end;
           end;
           end;
       end;
       end;
 
 
@@ -1114,6 +1154,8 @@ implementation
                                           begin
                                           begin
                                             inc(extradataoffset,multiplier*(((tai_realconst(hp).savesize-4)+3) div 4));
                                             inc(extradataoffset,multiplier*(((tai_realconst(hp).savesize-4)+3) div 4));
                                           end;
                                           end;
+                                        else
+                                          ;
                                       end;
                                       end;
                                       { check if the same constant has been already inserted into the currently handled list,
                                       { check if the same constant has been already inserted into the currently handled list,
                                         if yes, reuse it }
                                         if yes, reuse it }
@@ -1123,8 +1165,9 @@ implementation
                                           while assigned(hp2) do
                                           while assigned(hp2) do
                                             begin
                                             begin
                                               if (hp2.typ=ait_const) and (tai_const(hp2).sym=tai_const(hp).sym)
                                               if (hp2.typ=ait_const) and (tai_const(hp2).sym=tai_const(hp).sym)
-                                                and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label)
-                                              then
+                                                and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label) and
+                                                { gottpoff and tlsgd symbols are PC relative, so we cannot reuse them }
+                                                (not(tai_const(hp2).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc])) then
                                                 begin
                                                 begin
                                                   with taicpu(curtai).oper[curop]^.ref^ do
                                                   with taicpu(curtai).oper[curop]^.ref^ do
                                                     begin
                                                     begin
@@ -1172,6 +1215,8 @@ implementation
                 begin
                 begin
                   inc(curinspos,multiplier*((tai_realconst(hp).savesize+3) div 4));
                   inc(curinspos,multiplier*((tai_realconst(hp).savesize+3) div 4));
                 end;
                 end;
+              else
+                ;
             end;
             end;
             { special case for case jump tables }
             { special case for case jump tables }
             penalty:=0;
             penalty:=0;
@@ -1189,9 +1234,9 @@ implementation
                       begin
                       begin
                         penalty:=multiplier;
                         penalty:=multiplier;
                         hp:=tai(hp.next);
                         hp:=tai(hp.next);
-                        { skip register allocations and comments inserted by the optimizer as well as a label
+                        { skip register allocations and comments inserted by the optimizer as well as a label and align
                           as jump tables for thumb might have }
                           as jump tables for thumb might have }
-                        while assigned(hp) and (hp.typ in [ait_comment,ait_regalloc,ait_label]) do
+                        while assigned(hp) and (hp.typ in [ait_comment,ait_regalloc,ait_label,ait_align]) do
                           hp:=tai(hp.next);
                           hp:=tai(hp.next);
                         while assigned(hp) and (hp.typ=ait_const) do
                         while assigned(hp) and (hp.typ=ait_const) do
                           begin
                           begin
@@ -1242,6 +1287,8 @@ implementation
                           or if we splitted them so split before }
                           or if we splitted them so split before }
                       CheckLimit(hp,4);
                       CheckLimit(hp,4);
                     end;
                     end;
+                  else
+                    ;
                 end;
                 end;
               end;
               end;
 
 
@@ -1396,8 +1443,11 @@ implementation
                               end;
                               end;
                           end;
                           end;
                       end;
                       end;
+                    else;
                   end;
                   end;
                 end;
                 end;
+              else
+                ;
             end;
             end;
 
 
             curtai:=tai(curtai.Next);
             curtai:=tai(curtai.Next);
@@ -1461,8 +1511,12 @@ implementation
                             taicpu(curtai).ops:=2;
                             taicpu(curtai).ops:=2;
                           end;
                           end;
                       end;
                       end;
+                    else
+                      ;
                   end;
                   end;
                 end;
                 end;
+              else
+                ;
             end;
             end;
 
 
             curtai:=tai(curtai.Next);
             curtai:=tai(curtai.Next);
@@ -1508,55 +1562,59 @@ implementation
           begin
           begin
             case curtai.typ of
             case curtai.typ of
               ait_instruction:
               ait_instruction:
-                if IsIT(taicpu(curtai).opcode) then
-                  begin
-                    levels := GetITLevels(taicpu(curtai).opcode);
-                    if levels < 4 then
-                      begin
-                        i:=levels;
-                        hp1:=tai(curtai.Next);
-                        while assigned(hp1) and
-                          (i > 0) do
-                          begin
-                            if hp1.typ=ait_instruction then
-                              begin
-                                dec(i);
-                                if (i = 0) and
-                                  mustbelast(hp1) then
-                                  begin
-                                    hp1:=nil;
-                                    break;
-                                  end;
-                              end;
-                            hp1:=tai(hp1.Next);
-                          end;
+                begin
+                  if IsIT(taicpu(curtai).opcode) then
+                    begin
+                      levels := GetITLevels(taicpu(curtai).opcode);
+                      if levels < 4 then
+                        begin
+                          i:=levels;
+                          hp1:=tai(curtai.Next);
+                          while assigned(hp1) and
+                            (i > 0) do
+                            begin
+                              if hp1.typ=ait_instruction then
+                                begin
+                                  dec(i);
+                                  if (i = 0) and
+                                    mustbelast(hp1) then
+                                    begin
+                                      hp1:=nil;
+                                      break;
+                                    end;
+                                end;
+                              hp1:=tai(hp1.Next);
+                            end;
 
 
-                        if assigned(hp1) then
-                          begin
-                            // We are pointing at the first instruction after the IT block
-                            while assigned(hp1) and
-                              (hp1.typ<>ait_instruction) do
-                                hp1:=tai(hp1.Next);
-
-                            if assigned(hp1) and
-                              (hp1.typ=ait_instruction) and
-                              IsIT(taicpu(hp1).opcode) then
-                              begin
-                                if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and
-                                  ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or
-                                   (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then
-                                  begin
-                                    taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode,
-                                                                                taicpu(hp1).opcode,
-                                                                                taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc));
+                          if assigned(hp1) then
+                            begin
+                              // We are pointing at the first instruction after the IT block
+                              while assigned(hp1) and
+                                (hp1.typ<>ait_instruction) do
+                                  hp1:=tai(hp1.Next);
+
+                              if assigned(hp1) and
+                                (hp1.typ=ait_instruction) and
+                                IsIT(taicpu(hp1).opcode) then
+                                begin
+                                  if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and
+                                    ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or
+                                     (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then
+                                    begin
+                                      taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode,
+                                                                                  taicpu(hp1).opcode,
+                                                                                  taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc));
 
 
-                                    list.Remove(hp1);
-                                    hp1.Free;
-                                  end;
-                              end;
-                          end;
-                      end;
-                  end;
+                                      list.Remove(hp1);
+                                      hp1.Free;
+                                    end;
+                                end;
+                            end;
+                        end;
+                    end;
+                end
+              else
+                ;
             end;
             end;
 
 
             curtai:=tai(curtai.Next);
             curtai:=tai(curtai.Next);
@@ -1583,6 +1641,8 @@ implementation
                       case taicpu(curtai).opcode of
                       case taicpu(curtai).opcode of
                         A_AND: taicpu(curtai).opcode:=A_BIC;
                         A_AND: taicpu(curtai).opcode:=A_BIC;
                         A_BIC: taicpu(curtai).opcode:=A_AND;
                         A_BIC: taicpu(curtai).opcode:=A_AND;
+                        else
+                          internalerror(2019050931);
                       end;
                       end;
                       taicpu(curtai).oper[2]^.val:=(not taicpu(curtai).oper[2]^.val) and $FFFFFFFF;
                       taicpu(curtai).oper[2]^.val:=(not taicpu(curtai).oper[2]^.val) and $FFFFFFFF;
                     end
                     end
@@ -1595,10 +1655,14 @@ implementation
                       case taicpu(curtai).opcode of
                       case taicpu(curtai).opcode of
                         A_ADD: taicpu(curtai).opcode:=A_SUB;
                         A_ADD: taicpu(curtai).opcode:=A_SUB;
                         A_SUB: taicpu(curtai).opcode:=A_ADD;
                         A_SUB: taicpu(curtai).opcode:=A_ADD;
+                        else
+                          internalerror(2019050930);
                       end;
                       end;
                       taicpu(curtai).oper[2]^.val:=-taicpu(curtai).oper[2]^.val;
                       taicpu(curtai).oper[2]^.val:=-taicpu(curtai).oper[2]^.val;
                     end;
                     end;
                 end;
                 end;
+              else
+                ;
             end;
             end;
 
 
             curtai:=tai(curtai.Next);
             curtai:=tai(curtai.Next);
@@ -1646,6 +1710,8 @@ implementation
                       end;
                       end;
                   end;
                   end;
                 end;
                 end;
+              else
+                ;
             end;
             end;
 
 
             curtai:=tai(curtai.Next);
             curtai:=tai(curtai.Next);
@@ -1671,6 +1737,7 @@ implementation
                            (taicpu(curtai).oper[2]^.typ=top_shifterop) then
                            (taicpu(curtai).oper[2]^.typ=top_shifterop) then
                           begin
                           begin
                             case taicpu(curtai).oper[2]^.shifterop^.shiftmode of
                             case taicpu(curtai).oper[2]^.shifterop^.shiftmode of
+                              SM_NONE: ;
                               SM_LSL: taicpu(curtai).opcode:=A_LSL;
                               SM_LSL: taicpu(curtai).opcode:=A_LSL;
                               SM_LSR: taicpu(curtai).opcode:=A_LSR;
                               SM_LSR: taicpu(curtai).opcode:=A_LSR;
                               SM_ASR: taicpu(curtai).opcode:=A_ASR;
                               SM_ASR: taicpu(curtai).opcode:=A_ASR;
@@ -1707,8 +1774,12 @@ implementation
                       begin
                       begin
                         taicpu(curtai).opcode:=A_SVC;
                         taicpu(curtai).opcode:=A_SVC;
                       end;
                       end;
+                    else
+                      ;
                   end;
                   end;
                 end;
                 end;
+              else
+                ;
             end;
             end;
 
 
             curtai:=tai(curtai.Next);
             curtai:=tai(curtai.Next);
@@ -2151,17 +2222,19 @@ implementation
 
 
         FPUMasks: array[tfputype] of longword =
         FPUMasks: array[tfputype] of longword =
           (
           (
-            IF_NONE,
-            IF_NONE,
-            IF_NONE,
-            IF_FPA,
-            IF_FPA,
-            IF_FPA,
-            IF_VFPv2,
-            IF_VFPv2 or IF_VFPv3,
-            IF_VFPv2 or IF_VFPv3,
-            IF_NONE,
-            IF_VFPv2 or IF_VFPv3 or IF_VFPv4
+            { fpu_none       } IF_NONE,
+            { fpu_soft       } IF_NONE,
+            { fpu_libgcc     } IF_NONE,
+            { fpu_fpa        } IF_FPA,
+            { fpu_fpa10      } IF_FPA,
+            { fpu_fpa11      } IF_FPA,
+            { fpu_vfpv2      } IF_VFPv2,
+            { fpu_vfpv3      } IF_VFPv2 or IF_VFPv3,
+            { fpu_neon_vfpv3 } IF_VFPv2 or IF_VFPv3 or IF_NEON,
+            { fpu_vfpv3_d16  } IF_VFPv2 or IF_VFPv3,
+            { fpu_fpv4_s16   } IF_NONE,
+            { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
+            { fpu_neon_vfpv4 } IF_VFPv2 or IF_VFPv3 or IF_VFPv4 or IF_NEON
           );
           );
       begin
       begin
         fArmVMask:=Masks[current_settings.cputype] or FPUMasks[current_settings.fputype];
         fArmVMask:=Masks[current_settings.cputype] or FPUMasks[current_settings.fputype];
@@ -2363,6 +2436,10 @@ implementation
                 begin
                 begin
                   ot:=OT_MODEFLAGS;
                   ot:=OT_MODEFLAGS;
                 end;
                 end;
+              top_realconst:
+                begin
+                  ot:=OT_IMMEDIATEMM;
+                end;
               else
               else
                 internalerror(2004022623);
                 internalerror(2004022623);
             end;
             end;
@@ -2719,6 +2796,8 @@ implementation
         refoper : poper;
         refoper : poper;
         msb : longint;
         msb : longint;
         r: byte;
         r: byte;
+        singlerec : tcompsinglerec;
+        doublerec : tcompdoublerec;
 
 
       procedure setshifterop(op : byte);
       procedure setshifterop(op : byte);
         var
         var
@@ -2937,6 +3016,7 @@ implementation
           shift:=0;
           shift:=0;
           typ:=0;
           typ:=0;
           case oper[op]^.shifterop^.shiftmode of
           case oper[op]^.shifterop^.shiftmode of
+            SM_None: ;
             SM_LSL: begin typ:=0; shift:=oper[op]^.shifterop^.shiftimm; end;
             SM_LSL: begin typ:=0; shift:=oper[op]^.shifterop^.shiftimm; end;
             SM_LSR: begin typ:=1; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
             SM_LSR: begin typ:=1; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
             SM_ASR: begin typ:=2; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
             SM_ASR: begin typ:=2; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
@@ -2977,13 +3057,23 @@ implementation
                 begin
                 begin
                   currsym:=objdata.symbolref(oper[0]^.ref^.symbol);
                   currsym:=objdata.symbolref(oper[0]^.ref^.symbol);
 
 
-                  bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
+                  { tlscall is not relative so ignore the offset }
+                  if oper[0]^.ref^.refaddr<>addr_tlscall then
+                    bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
 
 
                   if (opcode<>A_BL) or (condition<>C_None) then
                   if (opcode<>A_BL) or (condition<>C_None) then
                     objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_24)
                     objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_24)
                   else
                   else
-                    objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
-
+                    case oper[0]^.ref^.refaddr of
+                      addr_pic:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_ARM_CALL);
+                      addr_full:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
+                      addr_tlscall:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_TLS_CALL);
+                      else
+                        Internalerror(2019092903);
+                    end;
                   exit;
                   exit;
                 end;
                 end;
             end;
             end;
@@ -3881,36 +3971,76 @@ implementation
                   end;
                   end;
                 PF_F32:
                 PF_F32:
                   begin
                   begin
-                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) or
-                       (getregtype(oper[1]^.reg)<>R_MMREGISTER) then
+                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) then
                       Message(asmw_e_invalid_opcode_and_operands);
                       Message(asmw_e_invalid_opcode_and_operands);
 
 
+                    case oper[1]^.typ of
+                      top_realconst:
+                        begin
+                          if not(IsVFPFloatImmediate(s32real,oper[1]^.val_real)) then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          singlerec.value:=oper[1]^.val_real;
+                          singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+
+                          bytes:=bytes or ((singlerec.bytes[2] shr 3) and $f);
+                          bytes:=bytes or (DWord((singlerec.bytes[2] shr 7) and $1) shl 16) or (DWord(singlerec.bytes[3] and $3) shl 17) or (DWord((singlerec.bytes[3] shr 7) and $1) shl 19);
+                        end;
+                      top_reg:
+                        begin
+                          if getregtype(oper[1]^.reg)<>R_MMREGISTER then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          Rm:=getmmreg(oper[1]^.reg);
+                          bytes:=bytes or (((Rm and $1E) shr 1) shl 0);
+                          bytes:=bytes or ((Rm and $1) shl 5);
+                        end;
+                      else
+                        Message(asmw_e_invalid_opcode_and_operands);
+                    end;
                     Rd:=getmmreg(oper[0]^.reg);
                     Rd:=getmmreg(oper[0]^.reg);
-                    Rm:=getmmreg(oper[1]^.reg);
 
 
                     bytes:=bytes or (((Rd and $1E) shr 1) shl 12);
                     bytes:=bytes or (((Rd and $1E) shr 1) shl 12);
                     bytes:=bytes or ((Rd and $1) shl 22);
                     bytes:=bytes or ((Rd and $1) shl 22);
 
 
-                    bytes:=bytes or (((Rm and $1E) shr 1) shl 0);
-                    bytes:=bytes or ((Rm and $1) shl 5);
                   end;
                   end;
                 PF_F64:
                 PF_F64:
                   begin
                   begin
-                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) or
-                       (getregtype(oper[1]^.reg)<>R_MMREGISTER) then
+                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) then
                       Message(asmw_e_invalid_opcode_and_operands);
                       Message(asmw_e_invalid_opcode_and_operands);
 
 
+                    case oper[1]^.typ of
+                      top_realconst:
+                        begin
+                          if not(IsVFPFloatImmediate(s64real,oper[1]^.val_real)) then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          doublerec.value:=oper[1]^.val_real;
+                          doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+
+                          //      32c:       eeb41b00        vmov.f64        d1, #64 ; 0x40
+
+                          // 32c:       eeb61b00        vmov.f64        d1, #96 ; 0x60
+                          bytes:=bytes or (doublerec.bytes[6] and $f);
+                          bytes:=bytes or (DWord((doublerec.bytes[6] shr 4) and $7) shl 16) or (DWord((doublerec.bytes[7] shr 7) and $1) shl 19);
+                        end;
+                      top_reg:
+                        begin
+                          if getregtype(oper[1]^.reg)<>R_MMREGISTER then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          Rm:=getmmreg(oper[1]^.reg);
+                          bytes:=bytes or (Rm and $F);
+                          bytes:=bytes or ((Rm and $10) shl 1);
+                        end;
+                      else
+                        Message(asmw_e_invalid_opcode_and_operands);
+                    end;
                     Rd:=getmmreg(oper[0]^.reg);
                     Rd:=getmmreg(oper[0]^.reg);
-                    Rm:=getmmreg(oper[1]^.reg);
 
 
                     bytes:=bytes or (1 shl 8);
                     bytes:=bytes or (1 shl 8);
 
 
                     bytes:=bytes or ((Rd and $F) shl 12);
                     bytes:=bytes or ((Rd and $F) shl 12);
                     bytes:=bytes or (((Rd and $10) shr 4) shl 22);
                     bytes:=bytes or (((Rd and $10) shr 4) shl 22);
-
-                    bytes:=bytes or (Rm and $F);
-                    bytes:=bytes or ((Rm and $10) shl 1);
                   end;
                   end;
+                else
+                  Message(asmw_e_invalid_opcode_and_operands);
               end;
               end;
             end;
             end;
           #$41,#$91: // VMRS/VMSR
           #$41,#$91: // VMRS/VMSR
@@ -4071,6 +4201,8 @@ implementation
                         d:=(rd shr 4) and 1;
                         d:=(rd shr 4) and 1;
                         rd:=rd and $F;
                         rd:=rd and $F;
                       end;
                       end;
+                    else
+                      internalerror(2019050929);
                   end;
                   end;
 
 
                   m:=0;
                   m:=0;
@@ -4091,6 +4223,8 @@ implementation
                         m:=(rm shr 4) and 1;
                         m:=(rm shr 4) and 1;
                         rm:=rm and $F;
                         rm:=rm and $F;
                       end;
                       end;
+                    else
+                      internalerror(2019050928);
                   end;
                   end;
 
 
                   bytes:=bytes or (Rd shl 12);
                   bytes:=bytes or (Rd shl 12);
@@ -4107,6 +4241,8 @@ implementation
                     PF_F64S32,
                     PF_F64S32,
                     PF_F64U32:
                     PF_F64U32:
                       bytes:=bytes or (1 shl 8);
                       bytes:=bytes or (1 shl 8);
+                    else
+                      ;
                   end;
                   end;
 
 
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64] then
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64] then
@@ -4115,6 +4251,8 @@ implementation
                         PF_S32F64,
                         PF_S32F64,
                         PF_S32F32:
                         PF_S32F32:
                           bytes:=bytes or (1 shl 16);
                           bytes:=bytes or (1 shl 16);
+                        else
+                          ;
                       end;
                       end;
 
 
                       bytes:=bytes or (1 shl 18);
                       bytes:=bytes or (1 shl 18);
@@ -4185,9 +4323,9 @@ implementation
 
 
                         rn:=16;
                         rn:=16;
                       end;
                       end;
-                  else
-                    Rn:=0;
-                    message(asmw_e_invalid_opcode_and_operands);
+                    else
+                      Rn:=0;
+                      message(asmw_e_invalid_opcode_and_operands);
                   end;
                   end;
 
 
                   case oppostfix of
                   case oppostfix of
@@ -4199,10 +4337,10 @@ implementation
                         bytes:=bytes or (1 shl 8);
                         bytes:=bytes or (1 shl 8);
                         D:=(rd shr 4) and $1; Rd:=Rd and $F;
                         D:=(rd shr 4) and $1; Rd:=Rd and $F;
                       end;
                       end;
-                  else
-                    begin
-                      D:=rd and $1; Rd:=Rd shr 1;
-                    end;
+                    else
+                      begin
+                        D:=rd and $1; Rd:=Rd shr 1;
+                      end;
                   end;
                   end;
 
 
                   case oppostfix of
                   case oppostfix of
@@ -4211,6 +4349,8 @@ implementation
                     PF_F64U16,PF_F32U16,
                     PF_F64U16,PF_F32U16,
                     PF_F32U32,PF_F64U32:
                     PF_F32U32,PF_F64U32:
                       bytes:=bytes or (1 shl 16);
                       bytes:=bytes or (1 shl 16);
+                    else
+                      ;
                   end;
                   end;
 
 
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64,PF_S16F32,PF_S16F64,PF_U16F32,PF_U16F64] then
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64,PF_S16F32,PF_S16F64,PF_U16F32,PF_U16F64] then
@@ -4263,6 +4403,8 @@ implementation
                       bytes:=bytes or (1 shl 23);
                       bytes:=bytes or (1 shl 23);
                     PF_DB,PF_DBS,PF_DBD,PF_DBX:
                     PF_DB,PF_DBS,PF_DBD,PF_DBX:
                       bytes:=bytes or (2 shl 23);
                       bytes:=bytes or (2 shl 23);
+                    else
+                      ;
                   end;
                   end;
 
 
                   case oppostfix of
                   case oppostfix of
@@ -4271,6 +4413,8 @@ implementation
                         bytes:=bytes or (1 shl 8);
                         bytes:=bytes or (1 shl 8);
                         bytes:=bytes or (1 shl 0); // Offset is odd
                         bytes:=bytes or (1 shl 0); // Offset is odd
                       end;
                       end;
+                    else
+                      ;
                   end;
                   end;
 
 
                   dp_operation:=(oper[1]^.subreg=R_SUBFD);
                   dp_operation:=(oper[1]^.subreg=R_SUBFD);
@@ -4562,6 +4706,8 @@ implementation
                         bytes:=bytes or ((oper[2]^.val shr 2) and $7F);
                         bytes:=bytes or ((oper[2]^.val shr 2) and $7F);
                       end;
                       end;
                   end;
                   end;
+                else
+                  internalerror(2019050926);
               end;
               end;
             end;
             end;
           #$65: { Thumb load/store }
           #$65: { Thumb load/store }
@@ -4698,6 +4844,8 @@ implementation
                     else
                     else
                       bytes:=bytes or (getsupreg(oper[0]^.reg) shl 8);
                       bytes:=bytes or (getsupreg(oper[0]^.reg) shl 8);
                   end;
                   end;
+                else
+                  internalerror(2019050925);
               end;
               end;
             end;
             end;
           #$6A: { Thumb: IT }
           #$6A: { Thumb: IT }
@@ -5303,6 +5451,8 @@ implementation
               case oppostfix of
               case oppostfix of
                 PF_None,PF_IA,PF_FD: bytes:=bytes or ($1 shl 23);
                 PF_None,PF_IA,PF_FD: bytes:=bytes or ($1 shl 23);
                 PF_DB,PF_EA: bytes:=bytes or ($2 shl 23);
                 PF_DB,PF_EA: bytes:=bytes or ($2 shl 23);
+              else
+                message1(asmw_e_invalid_opcode_and_operands, '"Invalid Postfix"');
               end;
               end;
             end;
             end;
           #$8D: { Thumb-2: BL/BLX }
           #$8D: { Thumb-2: BL/BLX }
@@ -5450,9 +5600,13 @@ implementation
                     bytes:=bytes or (1 shl 24);
                     bytes:=bytes or (1 shl 24);
 
 
                   case oppostfix of
                   case oppostfix of
+                    PF_S: bytes:=bytes or (0 shl 22) or (0 shl 15);
                     PF_D: bytes:=bytes or (0 shl 22) or (1 shl 15);
                     PF_D: bytes:=bytes or (0 shl 22) or (1 shl 15);
                     PF_E: bytes:=bytes or (1 shl 22) or (0 shl 15);
                     PF_E: bytes:=bytes or (1 shl 22) or (0 shl 15);
                     PF_P: bytes:=bytes or (1 shl 22) or (1 shl 15);
                     PF_P: bytes:=bytes or (1 shl 22) or (1 shl 15);
+                    PF_EP: ;
+                    else
+                      message1(asmw_e_invalid_opcode_and_operands, '"Invalid postfix"');
                   end;
                   end;
                 end
                 end
               else
               else
@@ -5527,6 +5681,7 @@ implementation
                 end;
                 end;
 
 
               case roundingmode of
               case roundingmode of
+                RM_NONE: ;
                 RM_P: bytes:=bytes or (1 shl 5);
                 RM_P: bytes:=bytes or (1 shl 5);
                 RM_M: bytes:=bytes or (2 shl 5);
                 RM_M: bytes:=bytes or (2 shl 5);
                 RM_Z: bytes:=bytes or (3 shl 5);
                 RM_Z: bytes:=bytes or (3 shl 5);
@@ -5554,6 +5709,7 @@ implementation
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 12);
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 12);
 
 
                     case roundingmode of
                     case roundingmode of
+                      RM_NONE: ;
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
@@ -5573,6 +5729,7 @@ implementation
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 0);
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 0);
 
 
                     case roundingmode of
                     case roundingmode of
+                      RM_NONE: ;
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
@@ -5602,6 +5759,8 @@ implementation
                         Message(asmw_e_invalid_opcode_and_operands);
                         Message(asmw_e_invalid_opcode_and_operands);
                       end;
                       end;
                   end;
                   end;
+                else
+                  Message1(asmw_e_invalid_opcode_and_operands, '"Unsupported opcode"');
               end;
               end;
             end;
             end;
           #$fe: // No written data
           #$fe: // No written data

+ 53 - 15
compiler/arm/agarmgas.pas

@@ -49,6 +49,7 @@ unit agarmgas;
 
 
       TArmAppleGNUAssembler=class(TAppleGNUassembler)
       TArmAppleGNUAssembler=class(TAppleGNUassembler)
         constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
         constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
         procedure WriteExtraHeader; override;
         procedure WriteExtraHeader; override;
       end;
       end;
 
 
@@ -94,7 +95,9 @@ unit agarmgas;
       begin
       begin
         inherited;
         inherited;
         InstrWriter := TArmInstrWriter.create(self);
         InstrWriter := TArmInstrWriter.create(self);
+{$ifndef llvm}
         if GenerateThumb2Code then
         if GenerateThumb2Code then
+{$endif}
           TArmInstrWriter(InstrWriter).unified_syntax:=true;
           TArmInstrWriter(InstrWriter).unified_syntax:=true;
       end;
       end;
 
 
@@ -102,18 +105,26 @@ unit agarmgas;
     function TArmGNUAssembler.MakeCmdLine: TCmdStr;
     function TArmGNUAssembler.MakeCmdLine: TCmdStr;
       begin
       begin
         result:=inherited MakeCmdLine;
         result:=inherited MakeCmdLine;
-        if (current_settings.fputype = fpu_soft) then
-          result:='-mfpu=softvfp '+result;
-        if (current_settings.fputype = fpu_vfpv2) then
-          result:='-mfpu=vfpv2 '+result;
-        if (current_settings.fputype = fpu_vfpv3) then
-          result:='-mfpu=vfpv3 '+result;
-        if (current_settings.fputype = fpu_vfpv3_d16) then
-          result:='-mfpu=vfpv3-d16 '+result;
-        if (current_settings.fputype = fpu_fpv4_s16) then
-          result:='-mfpu=fpv4-sp-d16 '+result;
-        if (current_settings.fputype = fpu_vfpv4) then
-          result:='-mfpu=vfpv4 '+result;
+        case current_settings.fputype of
+          fpu_soft:
+            result:='-mfpu=softvfp '+result;
+          fpu_vfpv2:
+            result:='-mfpu=vfpv2 '+result;
+          fpu_vfpv3:
+            result:='-mfpu=vfpv3 '+result;
+          fpu_neon_vfpv3:
+            result:='-mfpu=neon-vfpv3 '+result;
+          fpu_vfpv3_d16:
+            result:='-mfpu=vfpv3-d16 '+result;
+          fpu_fpv4_s16:
+            result:='-mfpu=fpv4-sp-d16 '+result;
+          fpu_vfpv4:
+            result:='-mfpu=vfpv4 '+result;
+          fpu_neon_vfpv4:
+            result:='-mfpu=neon-vfpv4 '+result;
+          else
+            ;
+        end;
 
 
         if GenerateThumb2Code then
         if GenerateThumb2Code then
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
@@ -124,7 +135,11 @@ unit agarmgas;
 
 
         if target_info.abi = abi_eabihf then
         if target_info.abi = abi_eabihf then
           { options based on what gcc uses on debian armhf }
           { options based on what gcc uses on debian armhf }
-          result:='-mfloat-abi=hard -meabi=5 '+result;
+          result:='-mfloat-abi=hard -meabi=5 '+result
+        else if (target_info.abi = abi_eabi) and not(current_settings.fputype = fpu_soft) then
+          result:='-mfloat-abi=softfp -meabi=5 '+result
+        else if (target_info.abi = abi_eabi) and (current_settings.fputype = fpu_soft) then
+          result:='-mfloat-abi=soft -meabi=5 '+result;
       end;
       end;
 
 
     procedure TArmGNUAssembler.WriteExtraHeader;
     procedure TArmGNUAssembler.WriteExtraHeader;
@@ -146,6 +161,18 @@ unit agarmgas;
       end;
       end;
 
 
 
 
+    function TArmAppleGNUAssembler.MakeCmdLine: TCmdStr;
+      begin
+        result:=inherited MakeCmdLine;
+	if (asminfo^.id = as_clang) then
+          begin
+            if fputypestrllvm[current_settings.fputype] <> '' then
+              result:='-m'+fputypestrllvm[current_settings.fputype]+' '+result;
+            { Apple arm always uses softfp floating point ABI }
+            result:='-mfloat-abi=softfp '+result;
+          end;
+      end;
+
     procedure TArmAppleGNUAssembler.WriteExtraHeader;
     procedure TArmAppleGNUAssembler.WriteExtraHeader;
       begin
       begin
         inherited WriteExtraHeader;
         inherited WriteExtraHeader;
@@ -180,7 +207,9 @@ unit agarmgas;
                 if offset<>0 then
                 if offset<>0 then
                   s:=s+tostr_with_plus(offset);
                   s:=s+tostr_with_plus(offset);
                 if refaddr=addr_pic then
                 if refaddr=addr_pic then
-                  s:=s+'(PLT)';
+                  s:=s+'(PLT)'
+                else if refaddr=addr_tlscall then
+                  s:=s+'(tlscall)';
               end
               end
             else
             else
               begin
               begin
@@ -201,6 +230,8 @@ unit agarmgas;
                        s:=s+', rrx'
                        s:=s+', rrx'
                      else if shiftmode <> SM_None then
                      else if shiftmode <> SM_None then
                        s:=s+', '+gas_shiftmode2str[shiftmode]+' #'+tostr(shiftimm);
                        s:=s+', '+gas_shiftmode2str[shiftmode]+' #'+tostr(shiftimm);
+                     if offset<>0 then
+                       Internalerror(2019012601);
                   end
                   end
                 else if offset<>0 then
                 else if offset<>0 then
                   s:=s+', #'+tostr(offset);
                   s:=s+', #'+tostr(offset);
@@ -210,6 +241,8 @@ unit agarmgas;
                     s:=s+']';
                     s:=s+']';
                   AM_PREINDEXED:
                   AM_PREINDEXED:
                     s:=s+']!';
                     s:=s+']!';
+                  else
+                    ;
                 end;
                 end;
               end;
               end;
 
 
@@ -318,6 +351,11 @@ unit agarmgas;
                   if srF in o.specialflags then getopstr:=getopstr+'f';
                   if srF in o.specialflags then getopstr:=getopstr+'f';
                   if srS in o.specialflags then getopstr:=getopstr+'s';
                   if srS in o.specialflags then getopstr:=getopstr+'s';
                 end;
                 end;
+            end;
+          top_realconst:
+            begin
+              str(o.val_real,Result);
+              Result:='#'+Result;
             end
             end
           else
           else
             internalerror(2002070604);
             internalerror(2002070604);
@@ -402,7 +440,7 @@ unit agarmgas;
             idtxt  : 'AS';
             idtxt  : 'AS';
             asmbin : 'as';
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
-            supported_targets : [system_arm_linux,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
+            supported_targets : [system_arm_linux,system_arm_netbsd,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
                                  system_arm_embedded,system_arm_symbian,system_arm_android,system_arm_aros];
                                  system_arm_embedded,system_arm_symbian,system_arm_android,system_arm_aros];
             flags : [af_needar,af_smartlink_sections];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             labelprefix : '.L';

+ 233 - 41
compiler/arm/aoptcpu.pas

@@ -25,7 +25,7 @@ Unit aoptcpu;
 
 
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
-{$define DEBUG_PREREGSCHEDULER}
+{ $define DEBUG_PREREGSCHEDULER}
 {$define DEBUG_AOPTCPU}
 {$define DEBUG_AOPTCPU}
 
 
 Interface
 Interface
@@ -34,6 +34,9 @@ uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
 
 
 Type
 Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
   TCpuAsmOptimizer = class(TAsmOptimizer)
+    { Can't be done in some cases due to the limited range of jumps }
+    function CanDoJumpOpts: Boolean; override;
+
     { uses the same constructor as TAopObj }
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
     procedure PeepHoleOptPass2;override;
@@ -83,6 +86,10 @@ Implementation
     cgobj,procinfo,
     cgobj,procinfo,
     aasmbase,aasmdata;
     aasmbase,aasmdata;
 
 
+{ Range check must be disabled explicitly as conversions between signed and unsigned
+  32-bit values are done without explicit typecasts }
+{$R-}
+
   function CanBeCond(p : tai) : boolean;
   function CanBeCond(p : tai) : boolean;
     begin
     begin
       result:=
       result:=
@@ -113,7 +120,9 @@ Implementation
         (r1.signindex = r2.signindex) and
         (r1.signindex = r2.signindex) and
         (r1.shiftimm = r2.shiftimm) and
         (r1.shiftimm = r2.shiftimm) and
         (r1.addressmode = r2.addressmode) and
         (r1.addressmode = r2.addressmode) and
-        (r1.shiftmode = r2.shiftmode);
+        (r1.shiftmode = r2.shiftmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
     end;
     end;
 
 
   function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
   function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
@@ -218,7 +227,7 @@ Implementation
       if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
       if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
                           A_CMP, A_CMN, A_TST, A_TEQ,
                           A_CMP, A_CMN, A_TST, A_TEQ,
                           A_B, A_BL, A_BX, A_BLX,
                           A_B, A_BL, A_BX, A_BLX,
-                          A_SMLAL, A_UMLAL]) then i:=0;
+                          A_SMLAL, A_UMLAL, A_VSTM, A_VLDM]) then i:=0;
 
 
       while(i<p.ops) do
       while(i<p.ops) do
         begin
         begin
@@ -235,6 +244,8 @@ Implementation
               instructionLoadsFromReg :=
               instructionLoadsFromReg :=
                 (p.oper[I]^.ref^.base = reg) or
                 (p.oper[I]^.ref^.base = reg) or
                 (p.oper[I]^.ref^.index = reg);
                 (p.oper[I]^.ref^.index = reg);
+            else
+              ;
           end;
           end;
           if instructionLoadsFromReg then exit; {Bailout if we found something}
           if instructionLoadsFromReg then exit; {Bailout if we found something}
           Inc(I);
           Inc(I);
@@ -294,6 +305,8 @@ Implementation
         A_POP:
         A_POP:
           Result := (getsupreg(reg) in p.oper[0]^.regset^) or
           Result := (getsupreg(reg) in p.oper[0]^.regset^) or
                                    (reg=NR_STACK_POINTER_REG);
                                    (reg=NR_STACK_POINTER_REG);
+        else
+          ;
       end;
       end;
 
 
       if Result then
       if Result then
@@ -310,6 +323,8 @@ Implementation
           Result :=
           Result :=
             (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
             (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
             (taicpu(p).oper[0]^.ref^.base = reg);
             (taicpu(p).oper[0]^.ref^.base = reg);
+        else
+          ;
       end;
       end;
     end;
     end;
 
 
@@ -367,6 +382,16 @@ Implementation
     end;
     end;
 {$endif DEBUG_AOPTCPU}
 {$endif DEBUG_AOPTCPU}
 
 
+
+  function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
+    begin
+      { Cannot perform these jump optimisations if the ARM architecture has 16-bit thumb codes }
+      Result := not (
+        (current_settings.instructionset = is_thumb) and not (CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype])
+      );
+    end;
+
+
   function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
   function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
     var
     var
       alloc,
       alloc,
@@ -433,6 +458,19 @@ Implementation
 
 
               { finally get rid of the mov }
               { finally get rid of the mov }
               taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
               taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              then
+                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
               asml.remove(movp);
               asml.remove(movp);
               movp.free;
               movp.free;
             end;
             end;
@@ -447,9 +485,11 @@ Implementation
       hp1 : tai;
       hp1 : tai;
     begin
     begin
       Result:=false;
       Result:=false;
-      if (MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) or
-          ((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
-          ((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
+      if ((MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+           ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) or (taicpu(p).opcode=A_VLDR))
+          ) or
+          (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
+          (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
          ) and
          ) and
          (taicpu(movp).ops=2) and
          (taicpu(movp).ops=2) and
          MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
          MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
@@ -494,6 +534,17 @@ Implementation
                   IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
                   IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
                 end;
                 end;
 
 
+              { change
+                  vldr reg0,[reg1]
+                  vmov reg2,reg0
+                into
+                  ldr reg2,[reg1]
+
+                if reg2 is an int register
+              }
+              if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
+                taicpu(p).opcode:=A_LDR;
+
               { finally get rid of the mov }
               { finally get rid of the mov }
               taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
               taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
               asml.remove(movp);
               asml.remove(movp);
@@ -627,7 +678,6 @@ Implementation
     var
     var
       hp1,hp2,hp3,hp4: tai;
       hp1,hp2,hp3,hp4: tai;
       i, i2: longint;
       i, i2: longint;
-      TmpUsedRegs: TAllUsedRegs;
       tempop: tasmop;
       tempop: tasmop;
       oldreg: tregister;
       oldreg: tregister;
       dealloc: tai_regalloc;
       dealloc: tai_regalloc;
@@ -654,6 +704,9 @@ Implementation
             if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
             if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
                                  A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
                                  A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
               GetNextInstruction(p, hp1) and
               GetNextInstruction(p, hp1) and
+              { mlas is only allowed in arm mode }
+              ((taicpu(p).opcode<>A_MLA) or
+               (current_settings.instructionset<>is_thumb)) and
               MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
               MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
               (taicpu(hp1).oper[1]^.typ = top_const) and
               (taicpu(hp1).oper[1]^.typ = top_const) and
               (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
               (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
@@ -910,7 +963,7 @@ Implementation
                           MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
                           MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
                           MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
                           MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
                           begin
                           begin
-                            CopyUsedRegs(TmpUsedRegs);
+                            TransferUsedRegs(TmpUsedRegs);
                             UpdateUsedRegs(TmpUsedRegs, tai(p.next));
                             UpdateUsedRegs(TmpUsedRegs, tai(p.next));
                             UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
                             UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
                             if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
                             if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
@@ -924,7 +977,6 @@ Implementation
                                 p:=hp2;
                                 p:=hp2;
                                 Result:=true;
                                 Result:=true;
                               end;
                               end;
-                            ReleaseUsedRegs(TmpUsedRegs);
                           end
                           end
                         { fold
                         { fold
                           mov reg1,reg0, shift imm1
                           mov reg1,reg0, shift imm1
@@ -1188,9 +1240,52 @@ Implementation
                       ....
                       ....
                     }
                     }
                     if (taicpu(p).ops = 2) and
                     if (taicpu(p).ops = 2) and
-                       GetNextInstruction(p,hp1) and
+                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (tai(hp1).typ = ait_instruction) then
                        (tai(hp1).typ = ait_instruction) then
                       begin
                       begin
+                        {
+                          This removes the mul from
+                          mov rX,0
+                          ...
+                          mul ...,rX,...
+                        }
+                        if false and (taicpu(p).oper[1]^.typ = top_const) and
+                          (taicpu(p).oper[1]^.val=0) and
+                          MatchInstruction(hp1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                          (((taicpu(hp1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^)) or
+                           ((taicpu(hp1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^))) then
+                            begin
+                              TransferUsedRegs(TmpUsedRegs);
+                              UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                              UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                              DebugMsg('Peephole MovMUL/MLA2Mov0 done', p);
+                              if taicpu(hp1).opcode=A_MUL then
+                                taicpu(hp1).loadconst(1,0)
+                              else
+                                taicpu(hp1).loadreg(1,taicpu(hp1).oper[3]^.reg);
+                              taicpu(hp1).ops:=2;
+                              taicpu(hp1).opcode:=A_MOV;
+                              if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                                RemoveCurrentP(p);
+                              Result:=true;
+                              exit;
+                            end
+                        else if (taicpu(p).oper[1]^.typ = top_const) and
+                          (taicpu(p).oper[1]^.val=0) and
+                          MatchInstruction(hp1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[3]^) then
+                            begin
+                              TransferUsedRegs(TmpUsedRegs);
+                              UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                              UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                              DebugMsg('Peephole MovMLA2MUL 1 done', p);
+                              taicpu(hp1).ops:=3;
+                              taicpu(hp1).opcode:=A_MUL;
+                              if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                                RemoveCurrentP(p);
+                              Result:=true;
+                              exit;
+                            end
                         {
                         {
                           This changes the very common
                           This changes the very common
                           mov r0, #0
                           mov r0, #0
@@ -1200,7 +1295,7 @@ Implementation
 
 
                           and removes all superfluous mov instructions
                           and removes all superfluous mov instructions
                         }
                         }
-                        if (taicpu(p).oper[1]^.typ = top_const) and
+                        else if (taicpu(p).oper[1]^.typ = top_const) and
                            (taicpu(hp1).opcode=A_STR) then
                            (taicpu(hp1).opcode=A_STR) then
                           while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
                           while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
                                 MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
                                 MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
@@ -1328,7 +1423,7 @@ Implementation
                         if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
                         if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
                           taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
                           taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
 
 
-                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, taicpu(p.Next));
+                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
                         if Assigned(dealloc) then
                         if Assigned(dealloc) then
                           begin
                           begin
                             asml.remove(dealloc);
                             asml.remove(dealloc);
@@ -1434,6 +1529,9 @@ Implementation
                                   hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
                                   hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
                                        taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
                                        taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
                                        taicpu(p).oper[2]^.shifterop^);
                                        taicpu(p).oper[2]^.shifterop^);
+                              if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
+                                AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hp1,UsedRegs);
+                              AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                               asml.insertbefore(hp2, hp1);
                               asml.insertbefore(hp2, hp1);
                               GetNextInstruction(p, hp2);
                               GetNextInstruction(p, hp2);
                               asml.remove(p);
                               asml.remove(p);
@@ -1544,7 +1642,14 @@ Implementation
                 A_ORR,
                 A_ORR,
                 A_MLA,
                 A_MLA,
                 A_MLS,
                 A_MLS,
-                A_MUL:
+                A_MUL,
+                A_QADD,A_QADD16,A_QADD8,
+                A_QSUB,A_QSUB16,A_QSUB8,
+                A_QDADD,A_QDSUB,A_QASX,A_QSAX,
+                A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
+                A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
+                A_PKHTB,A_PKHBT,
+                A_SMUAD,A_SMUSD:
                   begin
                   begin
                         {
                         {
                           optimize
                           optimize
@@ -1573,6 +1678,7 @@ Implementation
                             if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
                             if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
                               begin
                               begin
                                 DebugMsg('Peephole AndAnd2And done', p);
                                 DebugMsg('Peephole AndAnd2And done', p);
+                                AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
                                 taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
                                 taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
                                 taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
                                 taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
@@ -1583,12 +1689,12 @@ Implementation
                             else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                             else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                               begin
                               begin
                                 DebugMsg('Peephole AndAnd2And done', hp1);
                                 DebugMsg('Peephole AndAnd2And done', hp1);
+                                AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                                 taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
                                 taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
                                 taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                                 taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                                 GetNextInstruction(p, hp1);
                                 GetNextInstruction(p, hp1);
-                                asml.remove(p);
-                                p.free;
+                                RemoveCurrentP(p);
                                 p:=hp1;
                                 p:=hp1;
                                 Result:=true;
                                 Result:=true;
                               end;
                               end;
@@ -1613,9 +1719,9 @@ Implementation
                           begin
                           begin
                             DebugMsg('Peephole AndStrb2Strb done', p);
                             DebugMsg('Peephole AndStrb2Strb done', p);
                             taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
                             taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+                            AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                             GetNextInstruction(p, hp1);
                             GetNextInstruction(p, hp1);
-                            asml.remove(p);
-                            p.free;
+                            RemoveCurrentP(p);
                             p:=hp1;
                             p:=hp1;
                             result:=true;
                             result:=true;
                           end
                           end
@@ -1872,6 +1978,7 @@ Implementation
                           begin
                           begin
                             taicpu(hp1).opcode:=A_MLS;
                             taicpu(hp1).opcode:=A_MLS;
 
 
+
                             taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
                             taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
 
 
                             if taicpu(hp1).ops=2 then
                             if taicpu(hp1).ops=2 then
@@ -1882,11 +1989,12 @@ Implementation
                             taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
                             taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
 
 
                             DebugMsg('MulSub2MLS done', p);
                             DebugMsg('MulSub2MLS done', p);
+                            AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
+                            AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
+                            AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
 
 
                             taicpu(hp1).ops:=4;
                             taicpu(hp1).ops:=4;
-
-                            asml.remove(p);
-                            p.free;
+                            RemoveCurrentP(p);
                             p:=hp1;
                             p:=hp1;
                           end;
                           end;
 
 
@@ -1948,6 +2056,7 @@ Implementation
                       strb reg1,[...]
                       strb reg1,[...]
                     }
                     }
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
                       assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
                       assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
@@ -1973,6 +2082,7 @@ Implementation
                       uxtb reg3,reg1
                       uxtb reg3,reg1
                     }
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       (taicpu(hp1).ops = 2) and
                       (taicpu(hp1).ops = 2) and
@@ -1982,6 +2092,7 @@ Implementation
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                       begin
                         DebugMsg('Peephole UxtbUxth2Uxtb done', p);
                         DebugMsg('Peephole UxtbUxth2Uxtb done', p);
+                        AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
                         taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
                         taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
                         asml.remove(hp1);
                         asml.remove(hp1);
                         hp1.free;
                         hp1.free;
@@ -1996,6 +2107,7 @@ Implementation
                       uxtb reg3,reg1
                       uxtb reg3,reg1
                     }
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
                       (taicpu(hp1).ops = 2) and
                       (taicpu(hp1).ops = 2) and
@@ -2005,6 +2117,7 @@ Implementation
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                       begin
                         DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
                         DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
+                        AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
                         taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
                         taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
                         asml.remove(hp1);
                         asml.remove(hp1);
                         hp1.free;
                         hp1.free;
@@ -2019,8 +2132,8 @@ Implementation
                       uxtb reg3,reg1
                       uxtb reg3,reg1
                     }
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       (taicpu(p).ops=2) and
                       (taicpu(p).ops=2) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=3) and
                       (taicpu(hp1).ops=3) and
                       (taicpu(hp1).oper[2]^.typ=top_const) and
                       (taicpu(hp1).oper[2]^.typ=top_const) and
@@ -2055,6 +2168,7 @@ Implementation
                       strh reg1,[...]
                       strh reg1,[...]
                     }
                     }
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
@@ -2080,6 +2194,7 @@ Implementation
                       uxth reg3,reg1
                       uxth reg3,reg1
                     }
                     }
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=2) and
                       (taicpu(hp1).ops=2) and
@@ -2089,6 +2204,7 @@ Implementation
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                       begin
                         DebugMsg('Peephole UxthUxth2Uxth done', p);
                         DebugMsg('Peephole UxthUxth2Uxth done', p);
+                        AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                         taicpu(hp1).opcode:=A_UXTH;
                         taicpu(hp1).opcode:=A_UXTH;
                         taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                         taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                         GetNextInstruction(p, hp1);
                         GetNextInstruction(p, hp1);
@@ -2106,6 +2222,7 @@ Implementation
                       uxth reg3,reg1
                       uxth reg3,reg1
                     }
                     }
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=3) and
                       (taicpu(hp1).ops=3) and
@@ -2213,6 +2330,31 @@ Implementation
                         DebugMsg('Peephole Bl2B done', p);
                         DebugMsg('Peephole Bl2B done', p);
                       end;
                       end;
                   end;
                   end;
+                A_VMOV:
+                  begin
+                    {
+                      change
+                      vmov reg0,reg1,reg2
+                      vmov reg1,reg2,reg0
+                      into
+                      vmov reg0,reg1,reg2
+
+                      can be applied regardless if reg0 or reg2 is the vfp register
+                    }
+                    if (taicpu(p).ops = 3) and
+                      GetNextInstruction(p, hp1) and
+                      MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                      (taicpu(hp1).ops = 3) and
+                      MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
+                      MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
+                      MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) then
+                      begin
+                        asml.Remove(hp1);
+                        hp1.free;
+                        DebugMsg('Peephole VMovVMov2VMov done', p);
+                      end;
+                  end;
+                A_VLDR,
                 A_VADD,
                 A_VADD,
                 A_VMUL,
                 A_VMUL,
                 A_VDIV,
                 A_VDIV,
@@ -2226,8 +2368,12 @@ Implementation
                       RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then
                       RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then
                       Result:=true;
                       Result:=true;
                   end
                   end
+                else
+                  ;
               end;
               end;
           end;
           end;
+        else
+          ;
       end;
       end;
     end;
     end;
 
 
@@ -2277,7 +2423,9 @@ Implementation
                            (l<=4) and
                            (l<=4) and
                            CanBeCond(hp1) and
                            CanBeCond(hp1) and
                            { stop on labels }
                            { stop on labels }
-                           not(hp1.typ=ait_label) do
+                           not(hp1.typ=ait_label) and
+                           { avoid that we cannot recognize the case BccB2Cond }
+                           not((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B)) do
                            begin
                            begin
                               inc(l);
                               inc(l);
                               if MustBeLast(hp1) then
                               if MustBeLast(hp1) then
@@ -2312,6 +2460,7 @@ Implementation
                                       until not(assigned(hp1)) or
                                       until not(assigned(hp1)) or
                                         not(CanBeCond(hp1)) or
                                         not(CanBeCond(hp1)) or
                                         (hp1.typ=ait_label);
                                         (hp1.typ=ait_label);
+                                      DebugMsg('Peephole Bcc2Cond done',hp2);
                                       { wait with removing else GetNextInstruction could
                                       { wait with removing else GetNextInstruction could
                                         ignore the label if it was the only usage in the
                                         ignore the label if it was the only usage in the
                                         jump moved away }
                                         jump moved away }
@@ -2347,17 +2496,24 @@ Implementation
                                     (taicpu(hp2).condition=C_None) and
                                     (taicpu(hp2).condition=C_None) and
                                     { real label and jump, no further references to the
                                     { real label and jump, no further references to the
                                       label are allowed }
                                       label are allowed }
-                                    (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
+                                    (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
                                     FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
                                     FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
                                      begin
                                      begin
                                        l:=0;
                                        l:=0;
                                        { skip hp1 to <several moves 2> }
                                        { skip hp1 to <several moves 2> }
                                        GetNextInstruction(hp1, hp1);
                                        GetNextInstruction(hp1, hp1);
                                        while assigned(hp1) and
                                        while assigned(hp1) and
-                                         CanBeCond(hp1) do
+                                         CanBeCond(hp1) and
+                                         (l<=3) do
                                          begin
                                          begin
                                            inc(l);
                                            inc(l);
-                                           GetNextInstruction(hp1, hp1);
+                                           if MustBeLast(hp1) then
+                                             begin
+                                               GetNextInstruction(hp1, hp1);
+                                               break;
+                                             end
+                                           else
+                                             GetNextInstruction(hp1, hp1);
                                          end;
                                          end;
                                        { hp1 points to yyy: }
                                        { hp1 points to yyy: }
                                        if assigned(hp1) and
                                        if assigned(hp1) and
@@ -2370,32 +2526,35 @@ Implementation
                                             repeat
                                             repeat
                                               if hp1.typ=ait_instruction then
                                               if hp1.typ=ait_instruction then
                                                 taicpu(hp1).condition:=condition;
                                                 taicpu(hp1).condition:=condition;
-                                              GetNextInstruction(hp1,hp1);
+                                              if MustBeLast(hp1) then
+                                                begin
+                                                  GetNextInstruction(hp1, hp1);
+                                                  break;
+                                                end
+                                              else
+                                                GetNextInstruction(hp1, hp1);
                                             until not(assigned(hp1)) or
                                             until not(assigned(hp1)) or
-                                              not(CanBeCond(hp1));
+                                              not(CanBeCond(hp1)) or
+                                              ((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B));
                                             { hp2 is still at jmp yyy }
                                             { hp2 is still at jmp yyy }
                                             GetNextInstruction(hp2,hp1);
                                             GetNextInstruction(hp2,hp1);
-                                            { hp2 is now at xxx: }
+                                            { hp1 is now at xxx: }
                                             condition:=inverse_cond(condition);
                                             condition:=inverse_cond(condition);
                                             GetNextInstruction(hp1,hp1);
                                             GetNextInstruction(hp1,hp1);
                                             { hp1 is now at <several movs 2> }
                                             { hp1 is now at <several movs 2> }
                                             repeat
                                             repeat
-                                              taicpu(hp1).condition:=condition;
+                                              if hp1.typ=ait_instruction then
+                                                taicpu(hp1).condition:=condition;
                                               GetNextInstruction(hp1,hp1);
                                               GetNextInstruction(hp1,hp1);
                                             until not(assigned(hp1)) or
                                             until not(assigned(hp1)) or
                                               not(CanBeCond(hp1)) or
                                               not(CanBeCond(hp1)) or
                                               (hp1.typ=ait_label);
                                               (hp1.typ=ait_label);
-                                            {
-                                            asml.remove(hp1.next)
-                                            hp1.next.free;
-                                            asml.remove(hp1);
-                                            hp1.free;
-                                            }
+                                            DebugMsg('Peephole BccB2Cond done',hp3);
                                             { remove Bcc }
                                             { remove Bcc }
                                             tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
                                             tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
                                             asml.remove(hp3);
                                             asml.remove(hp3);
                                             hp3.free;
                                             hp3.free;
-                                            { remove jmp }
+                                            { remove B }
                                             tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
                                             tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
                                             asml.remove(hp2);
                                             asml.remove(hp2);
                                             hp2.free;
                                             hp2.free;
@@ -2405,8 +2564,12 @@ Implementation
                                 end;
                                 end;
                            end;
                            end;
                       end;
                       end;
+                  else
+                    ;
                 end;
                 end;
               end;
               end;
+            else
+              ;
           end;
           end;
           p := tai(p.next)
           p := tai(p.next)
         end;
         end;
@@ -2443,7 +2606,7 @@ Implementation
           exit;
           exit;
         regtype:=getregtype(reg);
         regtype:=getregtype(reg);
         supreg:=getsupreg(reg);
         supreg:=getsupreg(reg);
-        if (cg.rg[regtype].live_end[supreg]=hp1) and
+        if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_end[supreg]=hp1) and
           RegInInstruction(reg,p) then
           RegInInstruction(reg,p) then
           cg.rg[regtype].live_end[supreg]:=p;
           cg.rg[regtype].live_end[supreg]:=p;
       end;
       end;
@@ -2458,7 +2621,7 @@ Implementation
           exit;
           exit;
         regtype:=getregtype(reg);
         regtype:=getregtype(reg);
         supreg:=getsupreg(reg);
         supreg:=getsupreg(reg);
-        if (cg.rg[regtype].live_start[supreg]=p) and
+        if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_start[supreg]=p) and
           RegInInstruction(reg,hp1) then
           RegInInstruction(reg,hp1) then
          cg.rg[regtype].live_start[supreg]:=hp1;
          cg.rg[regtype].live_start[supreg]:=hp1;
       end;
       end;
@@ -2486,6 +2649,8 @@ Implementation
             for r:=RS_R0 to RS_R15 do
             for r:=RS_R0 to RS_R15 do
                if r in p.oper[i]^.regset^ then
                if r in p.oper[i]^.regset^ then
                  CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
                  CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
+          else
+            ;
         end;
         end;
 
 
       { if live of any reg used by hp1 ends at hp1 and p uses this register then
       { if live of any reg used by hp1 ends at hp1 and p uses this register then
@@ -2505,6 +2670,8 @@ Implementation
             for r:=RS_R0 to RS_R15 do
             for r:=RS_R0 to RS_R15 do
                if r in hp1.oper[i]^.regset^ then
                if r in hp1.oper[i]^.regset^ then
                  CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
                  CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
+          else
+            ;
         end;
         end;
     end;
     end;
 
 
@@ -2513,6 +2680,15 @@ Implementation
 
 
   { TODO : schedule also forward }
   { TODO : schedule also forward }
   { TODO : schedule distance > 1 }
   { TODO : schedule distance > 1 }
+
+    { returns true if p might be a load of a pc relative tls offset }
+    function PossibleTLSLoad(const p: tai) : boolean;
+      begin
+        Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
+          (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
+          (taicpu(p).oper[1]^.ref^.index=NR_PC)));
+      end;
+
     var
     var
       hp1,hp2,hp3,hp4,hp5,insertpos : tai;
       hp1,hp2,hp3,hp4,hp5,insertpos : tai;
       list : TAsmList;
       list : TAsmList;
@@ -2553,7 +2729,11 @@ Implementation
             ) and
             ) and
             GetNextInstruction(hp1,hp2) and
             GetNextInstruction(hp1,hp2) and
             (hp2.typ=ait_instruction) and
             (hp2.typ=ait_instruction) and
-            { loaded register used by next instruction? }
+            { loaded register used by next instruction?
+
+              if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
+              the bl may not be scheduled away from the bl) and it needs to be taken care of this case
+            }
             (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
             (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
             { loaded register not used by previous instruction? }
             { loaded register not used by previous instruction? }
             not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
             not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
@@ -2569,7 +2749,9 @@ Implementation
             ) and
             ) and
             { if we modify the basereg AND the first instruction used that reg, we can not schedule }
             { if we modify the basereg AND the first instruction used that reg, we can not schedule }
             ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
             ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
-             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
+             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
+            not(PossibleTLSLoad(p)) and
+            not(PossibleTLSLoad(hp1)) then
             begin
             begin
               hp3:=tai(p.Previous);
               hp3:=tai(p.Previous);
               hp5:=tai(p.next);
               hp5:=tai(p.next);
@@ -2690,7 +2872,11 @@ Implementation
                       A_ITETT:
                       A_ITETT:
                         if l=4 then taicpu(hp).opcode := A_ITET;
                         if l=4 then taicpu(hp).opcode := A_ITET;
                       A_ITTTT:
                       A_ITTTT:
-                        if l=4 then taicpu(hp).opcode := A_ITTT;
+                        begin
+                          if l=4 then taicpu(hp).opcode := A_ITTT;
+                        end
+                      else
+                        ;
                     end;
                     end;
 
 
                   break;
                   break;
@@ -2921,8 +3107,12 @@ Implementation
                                 end;
                                 end;
                            end;
                            end;
                       end;
                       end;
+                  else
+                    ;
                 end;
                 end;
               end;
               end;
+            else
+              ;
           end;
           end;
           p := tai(p.next)
           p := tai(p.next)
         end;
         end;
@@ -3073,6 +3263,8 @@ Implementation
                 SM_LSR: taicpu(p).opcode:=A_LSR;
                 SM_LSR: taicpu(p).opcode:=A_LSR;
                 SM_ASR: taicpu(p).opcode:=A_ASR;
                 SM_ASR: taicpu(p).opcode:=A_ASR;
                 SM_ROR: taicpu(p).opcode:=A_ROR;
                 SM_ROR: taicpu(p).opcode:=A_ROR;
+                else
+                  internalerror(2019050912);
               end;
               end;
 
 
               if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
               if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then

+ 19 - 6
compiler/arm/aoptcpub.pas

@@ -76,10 +76,6 @@ Const
 
 
   MaxCh = 3;
   MaxCh = 3;
 
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 4;
-
 {Oper index of operand that contains the source (reference) with a load }
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 {instruction                                                            }
 
 
@@ -123,14 +119,31 @@ Implementation
       i : Longint;
       i : Longint;
     begin
     begin
       result:=false;
       result:=false;
+      case taicpu(p1).opcode of
+        A_LDR:
+          begin
+            { special handling for LDRD }
+            if (taicpu(p1).oppostfix=PF_D) and (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(Reg)) then
+              begin
+                result:=true;
+                exit;
+              end;
+          end;
+        else
+          ;
+      end;
       for i:=0 to taicpu(p1).ops-1 do
       for i:=0 to taicpu(p1).ops-1 do
         case taicpu(p1).oper[i]^.typ of
         case taicpu(p1).oper[i]^.typ of
           top_reg:
           top_reg:
             if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
             if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
               exit(true);
               exit(true);
           top_ref:
           top_ref:
-            if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
-              exit(true);
+            begin
+              if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
+                exit(true);
+            end
+          else
+            ;
         end;
         end;
     end;
     end;
 
 

+ 1 - 0
compiler/arm/armatt.inc

@@ -324,6 +324,7 @@
 'svc',
 'svc',
 'bxj',
 'bxj',
 'udf',
 'udf',
+'veor',
 'tan',
 'tan',
 'sqt',
 'sqt',
 'suf',
 'suf',

+ 1 - 0
compiler/arm/armatts.inc

@@ -353,5 +353,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 15 - 1
compiler/arm/armins.dat

@@ -321,6 +321,7 @@ reg32,memam2              \x17\x04\x50                   ARM32,ARMv4
 reglo,memam3              \x65\x58\x0\2                  THUMB,ARMv4T
 reglo,memam3              \x65\x58\x0\2                  THUMB,ARMv4T
 reglo,memam4              \x66\x68\x0\2                  THUMB,ARMv4T
 reglo,memam4              \x66\x68\x0\2                  THUMB,ARMv4T
 reglo,memam5              \x67\x98\x0\2                  THUMB,ARMv4T
 reglo,memam5              \x67\x98\x0\2                  THUMB,ARMv4T
+reglo,memam2              \x67\x98\x0\2                  THUMB,ARMv4T
 reglo,memam6              \x67\x48\x0\2                  THUMB,ARMv4T
 reglo,memam6              \x67\x48\x0\2                  THUMB,ARMv4T
 reg32,memam2              \x88\xF8\x50\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2              \x88\xF8\x50\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2              \x17\x04\x10                   ARM32,ARMv4
 reg32,memam2              \x17\x04\x10                   ARM32,ARMv4
@@ -402,6 +403,7 @@ reg32,regf          \x10\x01\x0F                        ARM32,ARMv4
 regf,reg32          \x96\xF3\x80\x80\x0                 THUMB32,ARMv6
 regf,reg32          \x96\xF3\x80\x80\x0                 THUMB32,ARMv6
 
 
 regf,reg32          \x12\x01\x20\xF0                    ARM32,ARMv4
 regf,reg32          \x12\x01\x20\xF0                    ARM32,ARMv4
+regs,reg32          \x12\x01\x20\xF0                    ARM32,ARMv4
 regf,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 regf,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 regs,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 regs,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 
 
@@ -428,7 +430,9 @@ reg32,immshifter       \xB\x1\xE0                       ARM32,ARMv4
 
 
 [VMOVcc]
 [VMOVcc]
 vreg,vreg         \x90\xEE\xB0\xA\x40            THUMB32,VFPv2
 vreg,vreg         \x90\xEE\xB0\xA\x40            THUMB32,VFPv2
-vreg,vreg         \x40\xE\xB0\xA\x40            ARM32,VFPv2
+vreg,vreg         \x40\xE\xB0\xA\x40             ARM32,VFPv2
+vreg,immmm         \x90\xEE\xB0\xA\x0             THUMB32,VFPv3
+vreg,immmm         \x40\xE\xB0\xA\x0              ARM32,VFPv3
 
 
 reg32,vreg        \x90\xEE\x10\xA\x10            THUMB32,VFPv2
 reg32,vreg        \x90\xEE\x10\xA\x10            THUMB32,VFPv2
 vreg,reg32        \x90\xEE\x00\xA\x10            THUMB32,VFPv2
 vreg,reg32        \x90\xEE\x00\xA\x10            THUMB32,VFPv2
@@ -540,6 +544,7 @@ reg32,reglist		          \x26\x80			   ARM32,ARMv4
 reglo,memam3                \x65\x50\x0\2                  THUMB,ARMv4T
 reglo,memam3                \x65\x50\x0\2                  THUMB,ARMv4T
 reglo,memam4                \x66\x60\x0\2                  THUMB,ARMv4T
 reglo,memam4                \x66\x60\x0\2                  THUMB,ARMv4T
 reglo,memam5                \x67\x90\x0\2                  THUMB,ARMv4T
 reglo,memam5                \x67\x90\x0\2                  THUMB,ARMv4T
+reglo,memam2                \x67\x90\x0\2                  THUMB,ARMv4T
 reg32,memam2                \x88\xF8\x40\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2                \x88\xF8\x40\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2                \x17\x04\x00                   ARM32,ARMv4
 reg32,memam2                \x17\x04\x00                   ARM32,ARMv4
 
 
@@ -1481,8 +1486,12 @@ vreg,vreg               \x43\xEE\xB8\xA\x40          THUMB32,VFPv2
 vreg,vreg               \x43\xE\xB8\xA\x40           ARM32,VFPv2
 vreg,vreg               \x43\xE\xB8\xA\x40           ARM32,VFPv2
 
 
 [FMDRRcc]
 [FMDRRcc]
+vreg,reg32,reg32        \x90\xEC\x40\xB\x10          THUMB32,VFPv2
+vreg,reg32,reg32        \x40\xC\x40\xB\x10           ARM32,VFPv2
 
 
 [FMRRDcc]
 [FMRRDcc]
+reg32,reg32,vreg        \x90\xEC\x50\xB\x10          THUMB32,VFPv2
+reg32,reg32,vreg        \x40\xC\x50\xB\x10           ARM32,VFPv2
 
 
 ; Thumb-2
 ; Thumb-2
 
 
@@ -1722,6 +1731,11 @@ reg32              \x3\x01\x2F\xFF\x20             ARM32,ARMv5TEJ
 immshifter           \x61\xDE\x0                   THUMB,ARMv4T
 immshifter           \x61\xDE\x0                   THUMB,ARMv4T
 void                 void                          ARM32,ARMv4T
 void                 void                          ARM32,ARMv4T
 
 
+; NEON/Advanced SIMD
+
+[VEOR]
+vreg,vreg,vreg              \x42\xF3\x00\x01\x10   ARM32,NEON
+
 ; FPA
 ; FPA
 
 
 
 

+ 1 - 1
compiler/arm/armnop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armins.dat }
 { don't edit, this file is generated from armins.dat }
-952;
+962;

+ 1 - 0
compiler/arm/armop.inc

@@ -324,6 +324,7 @@ A_NEG,
 A_SVC,
 A_SVC,
 A_BXJ,
 A_BXJ,
 A_UDF,
 A_UDF,
+A_VEOR,
 A_TAN,
 A_TAN,
 A_SQT,
 A_SQT,
 A_SUF,
 A_SUF,

+ 70 - 0
compiler/arm/armtab.inc

@@ -1043,6 +1043,13 @@
     code    : #103#152#0#2;
     code    : #103#152#0#2;
     flags   : if_thumb or if_armv4t
     flags   : if_thumb or if_armv4t
   ),
   ),
+  (
+    opcode  : A_LDR;
+    ops     : 2;
+    optypes : (ot_reglo,ot_memoryam2,ot_none,ot_none,ot_none,ot_none);
+    code    : #103#152#0#2;
+    flags   : if_thumb or if_armv4t
+  ),
   (
   (
     opcode  : A_LDR;
     opcode  : A_LDR;
     ops     : 2;
     ops     : 2;
@@ -1351,6 +1358,13 @@
     code    : #18#1#32#240;
     code    : #18#1#32#240;
     flags   : if_arm32 or if_armv4
     flags   : if_arm32 or if_armv4
   ),
   ),
+  (
+    opcode  : A_MSR;
+    ops     : 2;
+    optypes : (ot_regs,ot_reg32,ot_none,ot_none,ot_none,ot_none);
+    code    : #18#1#32#240;
+    flags   : if_arm32 or if_armv4
+  ),
   (
   (
     opcode  : A_MSR;
     opcode  : A_MSR;
     ops     : 2;
     ops     : 2;
@@ -1470,6 +1484,20 @@
     code    : #64#14#176#10#64;
     code    : #64#14#176#10#64;
     flags   : if_arm32 or if_vfpv2
     flags   : if_arm32 or if_vfpv2
   ),
   ),
+  (
+    opcode  : A_VMOV;
+    ops     : 2;
+    optypes : (ot_vreg,ot_immediatemm,ot_none,ot_none,ot_none,ot_none);
+    code    : #144#238#176#10#0;
+    flags   : if_thumb32 or if_vfpv3
+  ),
+  (
+    opcode  : A_VMOV;
+    ops     : 2;
+    optypes : (ot_vreg,ot_immediatemm,ot_none,ot_none,ot_none,ot_none);
+    code    : #64#14#176#10#0;
+    flags   : if_arm32 or if_vfpv3
+  ),
   (
   (
     opcode  : A_VMOV;
     opcode  : A_VMOV;
     ops     : 2;
     ops     : 2;
@@ -1995,6 +2023,13 @@
     code    : #103#144#0#2;
     code    : #103#144#0#2;
     flags   : if_thumb or if_armv4t
     flags   : if_thumb or if_armv4t
   ),
   ),
+  (
+    opcode  : A_STR;
+    ops     : 2;
+    optypes : (ot_reglo,ot_memoryam2,ot_none,ot_none,ot_none,ot_none);
+    code    : #103#144#0#2;
+    flags   : if_thumb or if_armv4t
+  ),
   (
   (
     opcode  : A_STR;
     opcode  : A_STR;
     ops     : 2;
     ops     : 2;
@@ -5439,6 +5474,34 @@
     code    : #67#14#184#10#64;
     code    : #67#14#184#10#64;
     flags   : if_arm32 or if_vfpv2
     flags   : if_arm32 or if_vfpv2
   ),
   ),
+  (
+    opcode  : A_FMDRR;
+    ops     : 3;
+    optypes : (ot_vreg,ot_reg32,ot_reg32,ot_none,ot_none,ot_none);
+    code    : #144#236#64#11#16;
+    flags   : if_thumb32 or if_vfpv2
+  ),
+  (
+    opcode  : A_FMDRR;
+    ops     : 3;
+    optypes : (ot_vreg,ot_reg32,ot_reg32,ot_none,ot_none,ot_none);
+    code    : #64#12#64#11#16;
+    flags   : if_arm32 or if_vfpv2
+  ),
+  (
+    opcode  : A_FMRRD;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_vreg,ot_none,ot_none,ot_none);
+    code    : #144#236#80#11#16;
+    flags   : if_thumb32 or if_vfpv2
+  ),
+  (
+    opcode  : A_FMRRD;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_vreg,ot_none,ot_none,ot_none);
+    code    : #64#12#80#11#16;
+    flags   : if_arm32 or if_vfpv2
+  ),
   (
   (
     opcode  : A_POP;
     opcode  : A_POP;
     ops     : 1;
     ops     : 1;
@@ -6279,6 +6342,13 @@
     code    : #0;
     code    : #0;
     flags   : if_arm32 or if_armv4t
     flags   : if_arm32 or if_armv4t
   ),
   ),
+  (
+    opcode  : A_VEOR;
+    ops     : 3;
+    optypes : (ot_vreg,ot_vreg,ot_vreg,ot_none,ot_none,ot_none);
+    code    : #66#243#0#1#16;
+    flags   : if_arm32 or if_neon
+  ),
   (
   (
     opcode  : A_TAN;
     opcode  : A_TAN;
     ops     : 2;
     ops     : 2;

+ 261 - 136
compiler/arm/cgcpu.pas

@@ -42,7 +42,9 @@ unit cgcpu;
         cgsetflags : boolean;
         cgsetflags : boolean;
 
 
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
-        procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
+       protected
+         procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
+       public
         procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
         procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
 
 
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
@@ -59,6 +61,8 @@ unit cgcpu;
         procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
         procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
         procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
         procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
 
 
+        procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
+
         procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
         procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
         {  comparison operations }
         {  comparison operations }
         procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
         procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
@@ -107,13 +111,15 @@ unit cgcpu;
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
 
 
-        { clear out potential overflow bits from 8 or 16 bit operations  }
-        { the upper 24/16 bits of a register after an operation          }
+        { clear out potential overflow bits from 8 or 16 bit operations
+          the upper 24/16 bits of a register after an operation          }
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
 
 
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
 
 
+
+        procedure g_maybe_tls_init(list : TAsmList); override;
       end;
       end;
 
 
       { tcgarm is shared between normal arm and thumb-2 }
       { tcgarm is shared between normal arm and thumb-2 }
@@ -241,6 +247,10 @@ unit cgcpu;
        procinfo,cpupi,
        procinfo,cpupi,
        paramgr;
        paramgr;
 
 
+{ Range check must be disabled explicitly as conversions between signed and unsigned
+  32-bit values are done without explicit typecasts }
+{$R-}
+
 
 
     function get_fpu_postfix(def : tdef) : toppostfix;
     function get_fpu_postfix(def : tdef) : toppostfix;
       begin
       begin
@@ -290,7 +300,7 @@ unit cgcpu;
           non-overlapping subregs per register, so we can only use
           non-overlapping subregs per register, so we can only use
           half the single precision registers for now (as sub registers of the
           half the single precision registers for now (as sub registers of the
           double precision ones). }
           double precision ones). }
-        if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
+        if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
@@ -405,7 +415,7 @@ unit cgcpu;
                      begin
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                      end
                    else
                    else
                      usedtmpref:=ref;
                      usedtmpref:=ref;
@@ -437,7 +447,7 @@ unit cgcpu;
                      begin
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                      end
                    else
                    else
                      usedtmpref:=ref;
                      usedtmpref:=ref;
@@ -526,7 +536,7 @@ unit cgcpu;
                   begin
                   begin
                     { offset in the wrapper needs to be adjusted for the stored
                     { offset in the wrapper needs to be adjusted for the stored
                       return address }
                       return address }
-                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint),[]);
+                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
                     if is_shifter_const(ioffset,shift) then
                     if is_shifter_const(ioffset,shift) then
                       a_op_const_ref(list,OP_SUB,size,ioffset,href)
                       a_op_const_ref(list,OP_SUB,size,ioffset,href)
                     else
                     else
@@ -565,52 +575,16 @@ unit cgcpu;
       end;
       end;
 
 
 
 
-    procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
-      var
-        tmpref, ref: treference;
-        location: pcgparalocation;
-        sizeleft: aint;
+    procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
       begin
       begin
-        location := paraloc.location;
-        tmpref := r;
-        sizeleft := paraloc.intsize;
-        while assigned(location) do
+        { doubles in softemu mode have a strange order of registers and references }
+        if (cgpara.size=OS_F64) and
+           (location^.size=OS_32) then
           begin
           begin
-            paramanager.allocparaloc(list,location);
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-              LOC_REFERENCE:
-                begin
-                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment,[]);
-                  { doubles in softemu mode have a strange order of registers and references }
-                  if location^.size=OS_32 then
-                    g_concatcopy(list,tmpref,ref,4)
-                  else
-                    begin
-                      g_concatcopy(list,tmpref,ref,sizeleft);
-                      if assigned(location^.next) then
-                        internalerror(2005010710);
-                    end;
-                end;
-              LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                case location^.size of
-                   OS_F32, OS_F64:
-                     a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                   else
-                     internalerror(2002072801);
-                end;
-              LOC_VOID:
-                begin
-                  // nothing to do
-                end;
-              else
-                internalerror(2002081103);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location := location^.next;
-          end;
+            g_concatcopy(list,ref,paralocref,4)
+          end
+        else
+          inherited;
       end;
       end;
 
 
 
 
@@ -645,15 +619,9 @@ unit cgcpu;
         r : treference;
         r : treference;
         sym : TAsmSymbol;
         sym : TAsmSymbol;
       begin
       begin
-        { check not really correct: should only be used for non-Thumb cpus }
-        // if (CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype]) and
-        //   { WinCE GNU AS (not sure if this applies in general) does not support BLX imm }
-        // (target_info.system<>system_arm_wince) then
-        //   branchopcode:=A_BLX
-        // else
         { use always BL as newer binutils do not translate blx apparently
         { use always BL as newer binutils do not translate blx apparently
           generating BL is also what clang and gcc do by default }
           generating BL is also what clang and gcc do by default }
-          branchopcode:=A_BL;
+        branchopcode:=A_BL;
         if not(weak) then
         if not(weak) then
           sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
           sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
         else
         else
@@ -932,9 +900,11 @@ unit cgcpu;
               a_load_const_reg(list, size, a, dst);
               a_load_const_reg(list, size, a, dst);
               exit;
               exit;
             end;
             end;
+          else
+            ;
         end;
         end;
         ovloc.loc:=LOC_VOID;
         ovloc.loc:=LOC_VOID;
-        if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
+        if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
           case op of
           case op of
             OP_ADD:
             OP_ADD:
               begin
               begin
@@ -946,6 +916,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
                 a:=aint(dword(-a));
               end
               end
+            else
+              ;
           end;
           end;
 
 
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
@@ -994,6 +966,8 @@ unit cgcpu;
                       ovloc.resflags:=F_CS;
                       ovloc.resflags:=F_CS;
                     OP_SUB:
                     OP_SUB:
                       ovloc.resflags:=F_CC;
                       ovloc.resflags:=F_CC;
+                    else
+                      internalerror(2019050922);
                   end;
                   end;
                 end;
                 end;
           end
           end
@@ -1037,7 +1011,7 @@ unit cgcpu;
             { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
             { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
               into the following instruction}
               into the following instruction}
             else if (op = OP_AND) and
             else if (op = OP_AND) and
-                    is_continuous_mask(a, lsb, width) and
+                    is_continuous_mask(aword(a), lsb, width) and
                     ((lsb = 0) or ((lsb + width) = 32)) then
                     ((lsb = 0) or ((lsb + width) = 32)) then
               begin
               begin
                 shifterop_reset(so);
                 shifterop_reset(so);
@@ -1689,7 +1663,7 @@ unit cgcpu;
                 end;
                 end;
               LOC_REFERENCE :
               LOC_REFERENCE :
                 begin
                 begin
-                  reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment,[]);
+                  reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
                   { concatcopy should choose the best way to copy the data }
                   { concatcopy should choose the best way to copy the data }
                   g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
                   g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
                 end;
                 end;
@@ -1748,6 +1722,34 @@ unit cgcpu;
        end;
        end;
 
 
 
 
+    procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l: TAsmLabel;
+      begin
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
+            list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
+            current_asmdata.getjumplabel(l);
+            ai:=taicpu.op_sym(A_B,l);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
+
     {  comparison operations }
     {  comparison operations }
     procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
     procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
       l : tasmlabel);
       l : tasmlabel);
@@ -1905,6 +1907,10 @@ unit cgcpu;
             firstfloatreg:=RS_NO;
             firstfloatreg:=RS_NO;
             mmregs:=[];
             mmregs:=[];
             case current_settings.fputype of
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa10,
               fpu_fpa11:
               fpu_fpa11:
@@ -1920,16 +1926,22 @@ unit cgcpu;
                         inc(registerarea,12);
                         inc(registerarea,12);
                       end;
                       end;
                 end;
                 end;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
+              else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
                 begin;
                 begin;
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
                     they have numbers>$1f which is not really correct as they should simply have the same numbers
                     they have numbers>$1f which is not really correct as they should simply have the same numbers
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
-                end;
+                end
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                begin;
+                  { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
+                    they have numbers>$1f which is not really correct as they should simply have the same numbers
+                    as the even ones by with a different subtype as it is done on x86 with al/ah }
+                  mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
+                end
+              else
+                internalerror(2019050924);
             end;
             end;
             a_reg_alloc(list,NR_STACK_POINTER_REG);
             a_reg_alloc(list,NR_STACK_POINTER_REG);
             if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
             if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
@@ -2072,7 +2084,7 @@ unit cgcpu;
              begin
              begin
                reference_reset(ref,4,[]);
                reference_reset(ref,4,[]);
                if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
                if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                  (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
+                 (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
                  begin
                  begin
                    if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                    if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                      begin
                      begin
@@ -2099,10 +2111,7 @@ unit cgcpu;
                      list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
                      list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
                        lastfloatreg-firstfloatreg+1,ref));
                        lastfloatreg-firstfloatreg+1,ref));
                    end;
                    end;
-                 fpu_vfpv2,
-                 fpu_vfpv3,
-                 fpu_vfpv4,
-                 fpu_vfpv3_d16:
+                 else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                    begin
                    begin
                      ref.index:=ref.base;
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
                      ref.base:=NR_NO;
@@ -2113,10 +2122,12 @@ unit cgcpu;
                        postfix:=PF_IAD;}
                        postfix:=PF_IAD;}
                      if mmregs<>[] then
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
-                   end;
+                   end
+                 else
+                   internalerror(2019050923);
                end;
                end;
              end;
              end;
-        end;
+          end;
       end;
       end;
 
 
 
 
@@ -2143,6 +2154,10 @@ unit cgcpu;
             mmregs:=[];
             mmregs:=[];
             saveregs:=[];
             saveregs:=[];
             case current_settings.fputype of
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa10,
               fpu_fpa11:
               fpu_fpa11:
@@ -2161,17 +2176,16 @@ unit cgcpu;
                         }
                         }
                       end;
                       end;
                 end;
                 end;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                begin;
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                begin
                   { restore vfp registers? }
                   { restore vfp registers? }
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
                     they have numbers>$1f which is not really correct as they should simply have the same numbers
                     they have numbers>$1f which is not really correct as they should simply have the same numbers
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
-                end;
+                end
+              else
+                internalerror(2019050926);
             end;
             end;
 
 
             if (firstfloatreg<>RS_NO) or
             if (firstfloatreg<>RS_NO) or
@@ -2179,7 +2193,7 @@ unit cgcpu;
               begin
               begin
                 reference_reset(ref,4,[]);
                 reference_reset(ref,4,[]);
                 if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
                 if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                   (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
+                   (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
                   begin
                   begin
                     if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                     if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                       begin
                       begin
@@ -2205,10 +2219,7 @@ unit cgcpu;
                       list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
                       list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
                         lastfloatreg-firstfloatreg+1,ref));
                         lastfloatreg-firstfloatreg+1,ref));
                     end;
                     end;
-                  fpu_vfpv2,
-                  fpu_vfpv3,
-                  fpu_vfpv4,
-                  fpu_vfpv3_d16:
+                  else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                     begin
                     begin
                       ref.index:=ref.base;
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
                       ref.base:=NR_NO;
@@ -2219,7 +2230,9 @@ unit cgcpu;
                         mmpostfix:=PF_IAD;}
                         mmpostfix:=PF_IAD;}
                      if mmregs<>[] then
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
-                    end;
+                    end
+                  else
+                    internalerror(2019050921);
                 end;
                 end;
               end;
               end;
 
 
@@ -2478,6 +2491,19 @@ unit cgcpu;
                     a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
                     a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
                 indirection_done:=true;
                 indirection_done:=true;
               end
               end
+            else if ref.refaddr=addr_gottpoff then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsgd then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsdesc then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tpoff then
+              begin
+                if assigned(ref.relsymbol) or (ref.offset<>0) then
+                  Internalerror(2019092804);
+
+                current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+              end
             else if (cs_create_pic in current_settings.moduleswitches) then
             else if (cs_create_pic in current_settings.moduleswitches) then
               if (tf_pic_uses_got in target_info.flags) then
               if (tf_pic_uses_got in target_info.flags) then
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
@@ -2518,6 +2544,11 @@ unit cgcpu;
                (tf_pic_uses_got in target_info.flags) and
                (tf_pic_uses_got in target_info.flags) and
                assigned(ref.symbol) then
                assigned(ref.symbol) then
               begin
               begin
+                {$ifdef EXTDEBUG}
+                if not (pi_needs_got in current_procinfo.flags) then
+                	Comment(V_warning,'pi_needs_got not included');
+                {$endif EXTDEBUG}
+                Include(current_procinfo.flags,pi_needs_got);
                 reference_reset(tmpref,4,[]);
                 reference_reset(tmpref,4,[]);
                 tmpref.base:=current_procinfo.got;
                 tmpref.base:=current_procinfo.got;
                 tmpref.index:=tmpreg;
                 tmpref.index:=tmpreg;
@@ -2580,9 +2611,9 @@ unit cgcpu;
         paraloc1.init;
         paraloc1.init;
         paraloc2.init;
         paraloc2.init;
         paraloc3.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -2684,6 +2715,21 @@ unit cgcpu;
           list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
           list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
         end;
         end;
 
 
+      { save estimation, if a creating a separate ref is needed or
+        if we can keep the original reference while copying }
+      function SimpleRef(const ref : treference) : boolean;
+        begin
+          result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
+              ((ref.symbol=nil) and
+               (ref.addressmode=AM_OFFSET) and
+               (((ref.offset>=0) and (ref.offset+len<=31)) or
+                (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
+                { ldrh has a limited offset range }
+                (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
+               )
+              );
+        end;
+
       { will never be called with count<=4 }
       { will never be called with count<=4 }
       procedure genloop_thumb(count : aword;size : byte);
       procedure genloop_thumb(count : aword;size : byte);
 
 
@@ -2790,19 +2836,17 @@ unit cgcpu;
           begin
           begin
             tmpregi:=0;
             tmpregi:=0;
 
 
-            srcreg:=getintregister(list,OS_ADDR);
-
-            { explicit pc relative addressing, could be
-              e.g. a floating point constant }
-            if source.base=NR_PC then
+            { loading address in a separate register needed? }
+            if SimpleRef(source) then
               begin
               begin
                 { ... then we don't need a loadaddr }
                 { ... then we don't need a loadaddr }
                 srcref:=source;
                 srcref:=source;
               end
               end
             else
             else
               begin
               begin
+                srcreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,source,srcreg);
                 a_loadaddr_ref_reg(list,source,srcreg);
-                reference_reset_base(srcref,srcreg,0,source.alignment,source.volatility);
+                reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
               end;
               end;
 
 
             while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
             while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
@@ -2814,9 +2858,15 @@ unit cgcpu;
                 dec(len,4);
                 dec(len,4);
               end;
               end;
 
 
-            destreg:=getintregister(list,OS_ADDR);
-            a_loadaddr_ref_reg(list,dest,destreg);
-            reference_reset_base(dstref,destreg,0,dest.alignment,dest.volatility);
+            { loading address in a separate register needed? }
+            if SimpleRef(dest) then
+              dstref:=dest
+            else
+              begin
+                destreg:=getintregister(list,OS_ADDR);
+                a_loadaddr_ref_reg(list,dest,destreg);
+                reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
+              end;
             tmpregi2:=1;
             tmpregi2:=1;
             while (tmpregi2<=tmpregi) do
             while (tmpregi2<=tmpregi) do
               begin
               begin
@@ -2884,11 +2934,11 @@ unit cgcpu;
               begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
               begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
                 destreg:=getintregister(list,OS_ADDR);
                 destreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,dest,destreg);
                 a_loadaddr_ref_reg(list,dest,destreg);
-                reference_reset_base(dstref,destreg,0,dest.alignment,dest.volatility);
+                reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
 
 
                 srcreg:=getintregister(list,OS_ADDR);
                 srcreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,source,srcreg);
                 a_loadaddr_ref_reg(list,source,srcreg);
-                reference_reset_base(srcref,srcreg,0,source.alignment,source.volatility);
+                reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
 
 
                 countreg:=getintregister(list,OS_32);
                 countreg:=getintregister(list,OS_32);
 
 
@@ -2948,7 +2998,7 @@ unit cgcpu;
               if not((def.typ=pointerdef) or
               if not((def.typ=pointerdef) or
                     ((def.typ=orddef) and
                     ((def.typ=orddef) and
                      (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
                      (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
-                                               pasbool8,pasbool16,pasbool32,pasbool64]))) then
+                                               pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
                  ai.SetCondition(C_VC)
                  ai.SetCondition(C_VC)
               else
               else
                 if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
                 if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
@@ -3051,7 +3101,11 @@ unit cgcpu;
         list.concat(instr);
         list.concat(instr);
         case instr.opcode of
         case instr.opcode of
           A_VMOV:
           A_VMOV:
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
             add_move_instruction(instr);
             add_move_instruction(instr);
+          else
+            { VCVT can generate an exception }
+            maybe_check_for_fpu_exception(list);
         end;
         end;
       end;
       end;
 
 
@@ -3081,6 +3135,10 @@ unit cgcpu;
               if (fromsize<>tosize) then
               if (fromsize<>tosize) then
                 internalerror(2009112901);
                 internalerror(2009112901);
             end;
             end;
+          OS_F32,OS_F64:
+            ;
+          else
+            internalerror(2019050920);
         end;
         end;
 
 
         if (fromsize<>tosize) then
         if (fromsize<>tosize) then
@@ -3108,9 +3166,7 @@ unit cgcpu;
             end;
             end;
           end
           end
         else
         else
-          begin
-             handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
-          end;
+          handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
 
 
         if (tmpmmreg<>reg) then
         if (tmpmmreg<>reg) then
           a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
           a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
@@ -3142,6 +3198,10 @@ unit cgcpu;
               if (fromsize<>tosize) then
               if (fromsize<>tosize) then
                 internalerror(2009112901);
                 internalerror(2009112901);
             end;
             end;
+          OS_F32,OS_F64:
+            ;
+          else
+            internalerror(2019050919);
         end;
         end;
 
 
         if (fromsize<>tosize) then
         if (fromsize<>tosize) then
@@ -3172,9 +3232,8 @@ unit cgcpu;
             end;
             end;
           end
           end
         else
         else
-          begin
-             handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
-          end;
+          handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
+        { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
       end;
       end;
 
 
 
 
@@ -3190,6 +3249,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
            not shufflescalar(shuffle) then
           internalerror(2009112516);
           internalerror(2009112516);
         list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
         list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
       end;
 
 
 
 
@@ -3205,6 +3265,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
            not shufflescalar(shuffle) then
           internalerror(2009112514);
           internalerror(2009112514);
         list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
         list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
       end;
 
 
 
 
@@ -3218,20 +3279,30 @@ unit cgcpu;
           case op of
           case op of
             OP_XOR:
             OP_XOR:
               begin
               begin
-                if (src<>dst) or
-                   (reg_cgsize(src)<>size) or
-                   assigned(shuffle) then
-                  internalerror(2009112907);
-                tmpreg:=getintregister(list,OS_32);
-                a_load_const_reg(list,OS_32,0,tmpreg);
-                case size of
-                  OS_F32:
-                    list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
-                  OS_F64:
-                    list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
-                  else
-                    internalerror(2009112908);
-                end;
+                if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
+                  begin
+                    if (reg_cgsize(src)<>size) or
+                       assigned(shuffle) then
+                      internalerror(2019081301);
+                    list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
+                  end
+                else
+                  begin
+                    if (src<>dst) or
+                       (reg_cgsize(src)<>size) or
+                       assigned(shuffle) then
+                      internalerror(2009112907);
+                    tmpreg:=getintregister(list,OS_32);
+                    a_load_const_reg(list,OS_32,0,tmpreg);
+                    case size of
+                      OS_F32:
+                        list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
+                      OS_F64:
+                        list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
+                      else
+                        internalerror(2009112908);
+                    end;
+                  end;
               end
               end
             else
             else
               internalerror(2009112906);
               internalerror(2009112906);
@@ -3273,6 +3344,18 @@ unit cgcpu;
       end;
       end;
 
 
 
 
+    procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
+      begin
+        if pi_needs_tls in current_procinfo.flags then
+          begin
+            list.concat(tai_regalloc.alloc(NR_R0,nil));
+            a_call_name(list,'fpc_read_tp',false);
+            a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
+            list.concat(tai_regalloc.dealloc(NR_R0,nil));
+          end;
+      end;
+
+
     procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
     procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       begin
       begin
         case op of
         case op of
@@ -3323,6 +3406,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
         if (mmsize<>OS_F64) then
           internalerror(2009112405);
           internalerror(2009112405);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
       end;
 
 
 
 
@@ -3333,6 +3417,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
         if (mmsize<>OS_F64) then
           internalerror(2009112406);
           internalerror(2009112406);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
       end;
 
 
 
 
@@ -3346,6 +3431,8 @@ unit cgcpu;
           OP_NEG,
           OP_NEG,
           OP_NOT :
           OP_NOT :
             internalerror(2012022501);
             internalerror(2012022501);
+          else
+            ;
         end;
         end;
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
           begin
           begin
@@ -3410,6 +3497,8 @@ unit cgcpu;
                     ovloc.resflags:=F_CS;
                     ovloc.resflags:=F_CS;
                   OP_SUB:
                   OP_SUB:
                     ovloc.resflags:=F_CC;
                     ovloc.resflags:=F_CC;
+                  else
+                    internalerror(2019050918);
                 end;
                 end;
               end;
               end;
           end
           end
@@ -3483,6 +3572,8 @@ unit cgcpu;
           OP_NEG,
           OP_NEG,
           OP_NOT :
           OP_NOT :
             internalerror(2012022502);
             internalerror(2012022502);
+          else
+            ;
         end;
         end;
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
           begin
           begin
@@ -3511,6 +3602,8 @@ unit cgcpu;
                     ovloc.resflags:=F_CS;
                     ovloc.resflags:=F_CS;
                   OP_SUB:
                   OP_SUB:
                     ovloc.resflags:=F_CC;
                     ovloc.resflags:=F_CC;
+                  else
+                    internalerror(2019050917);
                 end;
                 end;
               end;
               end;
           end
           end
@@ -3794,7 +3887,7 @@ unit cgcpu;
                      begin
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                      end
                    else
                    else
                      usedtmpref:=ref;
                      usedtmpref:=ref;
@@ -3827,7 +3920,7 @@ unit cgcpu;
                      begin
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                      end
                    else
                    else
                      usedtmpref:=ref;
                      usedtmpref:=ref;
@@ -3941,7 +4034,7 @@ unit cgcpu;
                   begin
                   begin
                     { offset in the wrapper needs to be adjusted for the stored
                     { offset in the wrapper needs to be adjusted for the stored
                       return address }
                       return address }
-                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint),[]);
+                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
                     if is_thumb_imm(ioffset) then
                     if is_thumb_imm(ioffset) then
                       a_op_const_ref(list,OP_SUB,size,ioffset,href)
                       a_op_const_ref(list,OP_SUB,size,ioffset,href)
                     else
                     else
@@ -4008,7 +4101,7 @@ unit cgcpu;
             tmpreg:=getintregister(list,OS_ADDR);
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
             a_loadaddr_ref_reg(list,ref,tmpreg);
 
 
-            reference_reset_base(href,tmpreg,0,ref.alignment,ref.volatility);
+            reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
           end
           end
         else if (op=A_LDR) and
         else if (op=A_LDR) and
            (oppostfix in [PF_None]) and
            (oppostfix in [PF_None]) and
@@ -4018,7 +4111,7 @@ unit cgcpu;
             tmpreg:=getintregister(list,OS_ADDR);
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
             a_loadaddr_ref_reg(list,ref,tmpreg);
 
 
-            reference_reset_base(href,tmpreg,0,ref.alignment,ref.volatility);
+            reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
           end
           end
         else if (op=A_LDR) and
         else if (op=A_LDR) and
            ((oppostfix in [PF_SH,PF_SB]) or
            ((oppostfix in [PF_SH,PF_SB]) or
@@ -4027,7 +4120,7 @@ unit cgcpu;
             tmpreg:=getintregister(list,OS_ADDR);
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
             a_loadaddr_ref_reg(list,ref,tmpreg);
 
 
-            reference_reset_base(href,tmpreg,0,ref.alignment,ref.volatility);
+            reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
           end;
           end;
 
 
         Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
         Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
@@ -4086,6 +4179,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
                 a:=aint(dword(-a));
               end
               end
+            else
+              ;
           end;
           end;
 
 
         if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
         if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
@@ -4105,6 +4200,8 @@ unit cgcpu;
                   OP_SUB:
                   OP_SUB:
                     //!!! ovloc.resflags:=F_CC;
                     //!!! ovloc.resflags:=F_CC;
                     ;
                     ;
+                  else
+                    ;
                 end;
                 end;
               end;
               end;
           end
           end
@@ -4231,13 +4328,13 @@ unit cgcpu;
         rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
         rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
 
 
-        if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
+        if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
               ],first_mm_imreg,[])
               ],first_mm_imreg,[])
-        else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
+        else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
@@ -4345,7 +4442,7 @@ unit cgcpu;
                      begin
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                      end
                    else
                    else
                      usedtmpref:=ref;
                      usedtmpref:=ref;
@@ -4377,7 +4474,7 @@ unit cgcpu;
                      begin
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                      end
                    else
                    else
                      usedtmpref:=ref;
                      usedtmpref:=ref;
@@ -4434,6 +4531,11 @@ unit cgcpu;
               OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
               OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
               OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
               OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
               OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
               OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
+              OS_32,
+              OS_S32:
+                ;
+              else
+                internalerror(2019050916);
             end;
             end;
           end
           end
         else
         else
@@ -4449,7 +4551,7 @@ unit cgcpu;
         l1 : longint;
         l1 : longint;
       begin
       begin
         ovloc.loc:=LOC_VOID;
         ovloc.loc:=LOC_VOID;
-        if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
+        if (a<>-2147483648) and is_shifter_const(-a,shift) then
           case op of
           case op of
             OP_ADD:
             OP_ADD:
               begin
               begin
@@ -4461,6 +4563,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
                 a:=aint(dword(-a));
               end
               end
+            else
+              ;
           end;
           end;
 
 
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
@@ -4565,6 +4669,8 @@ unit cgcpu;
                       ovloc.resflags:=F_CS;
                       ovloc.resflags:=F_CS;
                     OP_SUB:
                     OP_SUB:
                       ovloc.resflags:=F_CC;
                       ovloc.resflags:=F_CC;
+                    else
+                      ;
                   end;
                   end;
                 end;
                 end;
           end
           end
@@ -4622,7 +4728,7 @@ unit cgcpu;
               list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
               list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
             else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
             else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
               list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
               list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
-            else if (op = OP_AND) and is_continuous_mask(not(a), shift, width) then
+            else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
               begin
               begin
                 a_load_reg_reg(list,size,size,src,dst);
                 a_load_reg_reg(list,size,size,src,dst);
                 list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
                 list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
@@ -4953,7 +5059,7 @@ unit cgcpu;
               end;
               end;
 
 
             if regs=[] then
             if regs=[] then
-              list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
+              list.concat(taicpu.op_reg(A_BX,NR_R14))
             else
             else
               begin
               begin
                 reference_reset(ref,4,[]);
                 reference_reset(ref,4,[]);
@@ -4963,7 +5069,7 @@ unit cgcpu;
               end;
               end;
           end
           end
         else
         else
-          list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
+          list.concat(taicpu.op_reg(A_BX,NR_R14));
       end;
       end;
 
 
 
 
@@ -5028,7 +5134,21 @@ unit cgcpu;
                 cg.a_label(current_procinfo.aktlocaldata,l);
                 cg.a_label(current_procinfo.aktlocaldata,l);
                 tmpref.symboldata:=current_procinfo.aktlocaldata.last;
                 tmpref.symboldata:=current_procinfo.aktlocaldata.last;
 
 
-                current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
+                if ref.refaddr=addr_gottpoff then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsgd then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsdesc then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tpoff then
+                  begin
+                    if assigned(ref.relsymbol) or (ref.offset<>0) then
+                      Internalerror(2019092805);
+
+                    current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+                  end
+                else
+                  current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
 
 
                 { load consts entry }
                 { load consts entry }
                 tmpref.symbol:=l;
                 tmpref.symbol:=l;
@@ -5139,6 +5259,7 @@ unit cgcpu;
             instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
             instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
             list.Concat(instr);
             list.Concat(instr);
             add_move_instruction(instr);
             add_move_instruction(instr);
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
           end
           end
         else if (fromsize=OS_F64) and
         else if (fromsize=OS_F64) and
           (tosize=OS_F64) then
           (tosize=OS_F64) then
@@ -5164,6 +5285,7 @@ unit cgcpu;
     procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
     procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
       begin
       begin
         handle_load_store(list,A_VSTR,PF_None,reg,ref);
         handle_load_store(list,A_VSTR,PF_None,reg,ref);
+        { VSTR cannot generate an FPU exception, so we do not need a check here }
       end;
       end;
 
 
 
 
@@ -5181,7 +5303,10 @@ unit cgcpu;
       begin
       begin
         if //(shuffle=nil) and
         if //(shuffle=nil) and
           (fromsize=OS_F32) then
           (fromsize=OS_F32) then
-          list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
+          begin
+            list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
+          end
         else
         else
           internalerror(2012100814);
           internalerror(2012100814);
       end;
       end;

+ 76 - 20
compiler/arm/cpubase.pas

@@ -113,9 +113,6 @@ unit cpubase;
 
 
       VOLATILE_INTREGISTERS_DARWIN = [RS_R0..RS_R3,RS_R9,RS_R12..RS_R14];
       VOLATILE_INTREGISTERS_DARWIN = [RS_R0..RS_R3,RS_R9,RS_R12..RS_R14];
 
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
 {*****************************************************************************
                           Instruction post fixes
                           Instruction post fixes
 *****************************************************************************}
 *****************************************************************************}
@@ -341,20 +338,6 @@ unit cpubase;
 *****************************************************************************}
 *****************************************************************************}
 
 
     const
     const
-      { Registers which must be saved when calling a routine declared as
-        cppdecl, cdecl, stdcall, safecall, palmossyscall. The registers
-        saved should be the ones as defined in the target ABI and / or GCC.
-
-        This value can be deduced from the CALLED_USED_REGISTERS array in the
-        GCC source.
-      }
-      saved_standard_registers : array[0..6] of tsuperregister =
-        (RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,RS_R10);
-
-      { this is only for the generic code which is not used for this architecture }
-      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
-      saved_mm_registers : array[0..0] of tsuperregister = (RS_INVALID);
-
       { Required parameter alignment when calling a routine declared as
       { Required parameter alignment when calling a routine declared as
         stdcall and cdecl. The alignment value should be the one defined
         stdcall and cdecl. The alignment value should be the one defined
         by GCC or the target ABI.
         by GCC or the target ABI.
@@ -382,6 +365,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
 
@@ -391,8 +377,11 @@ unit cpubase;
       doesn't handle ROR_C detection }
       doesn't handle ROR_C detection }
     function is_thumb32_imm(d : aint) : boolean;
     function is_thumb32_imm(d : aint) : boolean;
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean;
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean;
-    function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;
+    function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
     function dwarf_reg(r:tregister):shortint;
     function dwarf_reg(r:tregister):shortint;
+    function dwarf_reg_no_error(r:tregister):shortint;
+    function eh_return_data_regno(nr: longint): longint;
+
 
 
     function IsIT(op: TAsmOp) : boolean;
     function IsIT(op: TAsmOp) : boolean;
     function GetITLevels(op: TAsmOp) : longint;
     function GetITLevels(op: TAsmOp) : longint;
@@ -401,6 +390,8 @@ unit cpubase;
     function GenerateThumbCode : boolean;
     function GenerateThumbCode : boolean;
     function GenerateThumb2Code : boolean;
     function GenerateThumb2Code : boolean;
 
 
+    function IsVFPFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+
   implementation
   implementation
 
 
     uses
     uses
@@ -427,8 +418,11 @@ unit cpubase;
           R_MMREGISTER:
           R_MMREGISTER:
             begin
             begin
               case s of
               case s of
+                { records passed in MM registers }
+                OS_32,
                 OS_F32:
                 OS_F32:
                   cgsize2subreg:=R_SUBFS;
                   cgsize2subreg:=R_SUBFS;
+                OS_64,
                 OS_F64:
                 OS_F64:
                   cgsize2subreg:=R_SUBFD;
                   cgsize2subreg:=R_SUBFD;
                 else
                 else
@@ -549,6 +543,26 @@ unit cpubase;
       end;
       end;
 
 
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
       var
       var
          i : longint;
          i : longint;
@@ -621,7 +635,7 @@ unit cpubase;
           end;
           end;
       end;
       end;
     
     
-    function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;
+    function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
       var
       var
         msb : byte;
         msb : byte;
       begin
       begin
@@ -630,7 +644,7 @@ unit cpubase;
         
         
         width:=msb-lsb+1;
         width:=msb-lsb+1;
         
         
-        result:=(lsb<>255) and (msb<>255) and ((((1 shl (msb-lsb+1))-1) shl lsb) = d);
+        result:=(lsb<>255) and (msb<>255) and (aword(((1 shl (msb-lsb+1))-1) shl lsb) = d);
       end;
       end;
 
 
 
 
@@ -666,6 +680,19 @@ unit cpubase;
           internalerror(200603251);
           internalerror(200603251);
       end;
       end;
 
 
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
+
+    function eh_return_data_regno(nr: longint): longint;
+      begin
+        if (nr>=0) and (nr<2) then
+          result:=nr
+        else
+          result:=-1;
+      end;
+
       { Low part of 64bit return value }
       { Low part of 64bit return value }
     function NR_FUNCTION_RESULT64_LOW_REG: tregister; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function NR_FUNCTION_RESULT64_LOW_REG: tregister; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     begin
     begin
@@ -750,5 +777,34 @@ unit cpubase;
       end;
       end;
 
 
 
 
+    function IsVFPFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+      var
+        singlerec : tcompsinglerec;
+        doublerec : tcompdoublerec;
+      begin
+        Result:=false;
+        case ft of
+          s32real:
+            begin
+              singlerec.value:=value;
+              singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+              Result:=(singlerec.bytes[0]=0) and (singlerec.bytes[1]=0) and ((singlerec.bytes[2] and 7)=0)  and
+                (((singlerec.bytes[3] and $7e)=$40) or ((singlerec.bytes[3] and $7e)=$3e));
+            end;
+          s64real:
+            begin
+              doublerec.value:=value;
+              doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+              Result:=(doublerec.bytes[0]=0) and (doublerec.bytes[1]=0) and (doublerec.bytes[2]=0) and
+                      (doublerec.bytes[3]=0) and (doublerec.bytes[4]=0) and (doublerec.bytes[5]=0) and
+                      ((((doublerec.bytes[6] and $c0)=$0) and ((doublerec.bytes[7] and $7f)=$40)) or
+                       (((doublerec.bytes[6] and $c0)=$c0) and ((doublerec.bytes[7] and $7f)=$3f)));
+            end;
+          else
+            ;
+        end;
+      end;
+
+
 end.
 end.
 
 

+ 23 - 5
compiler/arm/cpuelf.pas

@@ -28,7 +28,7 @@ interface
 implementation
 implementation
 
 
   uses
   uses
-    globtype,cutils,cclasses,
+    globtype,globals,cutils,cclasses,
     verbose, elfbase,
     verbose, elfbase,
     systems,aasmbase,ogbase,ogelf,assemble;
     systems,aasmbase,ogbase,ogelf,assemble;
 
 
@@ -335,9 +335,24 @@ implementation
           result:=R_ARM_THM_CALL;
           result:=R_ARM_THM_CALL;
         RELOC_GOT32:
         RELOC_GOT32:
           result:=R_ARM_GOT_BREL;
           result:=R_ARM_GOT_BREL;
+        RELOC_TPOFF:
+          if current_settings.tlsmodel=tlsm_initial_exec then
+            result:=R_ARM_TLS_IE32
+          else if current_settings.tlsmodel=tlsm_local_exec then
+            result:=R_ARM_TLS_LE32
+          else
+            Internalerror(2019092901);
+        RELOC_TLSGD:
+          result:=R_ARM_TLS_GD32;
+        RELOC_TLSDESC:
+          result:=R_ARM_TLS_GOTDESC;
+        RELOC_TLS_CALL:
+          result:=R_ARM_TLS_CALL;
+        RELOC_ARM_CALL:
+          result:=R_ARM_CALL;
+        RELOC_DTPOFF:
+          result:=R_ARM_TLS_LDO32;
       else
       else
-        result:=0;
-        writeln(objrel.typ);
         InternalError(2012110602);
         InternalError(2012110602);
       end;
       end;
     end;
     end;
@@ -588,6 +603,8 @@ implementation
               data.Write(zero,4);
               data.Write(zero,4);
               continue;
               continue;
             end;
             end;
+          else
+            ;
         end;
         end;
 
 
         if (objreloc.flags and rf_raw)=0 then
         if (objreloc.flags and rf_raw)=0 then
@@ -955,8 +972,9 @@ implementation
          asmbin : '';
          asmbin : '';
          asmcmd : '';
          asmcmd : '';
          supported_targets : [system_arm_embedded,system_arm_darwin,
          supported_targets : [system_arm_embedded,system_arm_darwin,
-                              system_arm_linux,system_arm_gba,
-                              system_arm_nds,system_arm_aros];
+                              system_arm_linux,system_arm_netbsd,
+                              system_arm_gba,system_arm_nds,
+                              system_arm_aros];
          flags : [af_outputbinary,af_smartlink_sections,af_supports_dwarf];
          flags : [af_outputbinary,af_smartlink_sections,af_supports_dwarf];
          labelprefix : '.L';
          labelprefix : '.L';
          comment : '';
          comment : '';

+ 84 - 9
compiler/arm/cpuinfo.pas

@@ -14,6 +14,8 @@
 
 
 Unit CPUInfo;
 Unit CPUInfo;
 
 
+{$i fpcdefs.inc}
+
 Interface
 Interface
 
 
   uses
   uses
@@ -52,6 +54,9 @@ Type
        cpu_armv7r,
        cpu_armv7r,
        cpu_armv7m,
        cpu_armv7m,
        cpu_armv7em
        cpu_armv7em
+       { when new elements added afterwards,
+         update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas
+       }
       );
       );
 
 
    tinstructionset = (is_thumb,is_arm);
    tinstructionset = (is_thumb,is_arm);
@@ -66,11 +71,35 @@ Type
       fpu_fpa11,
       fpu_fpa11,
       fpu_vfpv2,
       fpu_vfpv2,
       fpu_vfpv3,
       fpu_vfpv3,
+      fpu_neon_vfpv3,
       fpu_vfpv3_d16,
       fpu_vfpv3_d16,
       fpu_fpv4_s16,
       fpu_fpv4_s16,
-      fpu_vfpv4
+      fpu_vfpv4,
+      fpu_neon_vfpv4
+      { when new elements added afterwards, update also fpu_vfp_last below and
+        update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
      );
      );
 
 
+Const
+   fpu_vfp_first = fpu_vfpv2;
+   fpu_vfp_last  = fpu_neon_vfpv4;
+
+  fputypestrllvm : array[tfputype] of string[14] = ('',
+    '',
+    '',
+    '',
+    '',
+    '',
+    'fpu=vfpv2',
+    'fpu=vfpv3',
+    'fpu=neon-vfpv3',
+    'fpu=vfpv3-d16',
+    'fpu=vfpv4-s16',
+    'fpu=vfpv4',
+    'fpu=neon-vfpv4'
+  );
+
+Type
    tcontrollertype =
    tcontrollertype =
      (ct_none,
      (ct_none,
 
 
@@ -208,6 +237,12 @@ Type
       ct_stm32f051r4,
       ct_stm32f051r4,
       ct_stm32f051r6,
       ct_stm32f051r6,
       ct_stm32f051r8,
       ct_stm32f051r8,
+      ct_stm32f091cc,
+      ct_stm32f091cb,
+      ct_stm32f091rc,
+      ct_stm32f091rb,
+      ct_stm32f091vc,
+      ct_stm32f091vb,
       ct_stm32f100x4, // LD&MD value line, 4=16,6=32,8=64,b=128
       ct_stm32f100x4, // LD&MD value line, 4=16,6=32,8=64,b=128
       ct_stm32f100x6,
       ct_stm32f100x6,
       ct_stm32f100x8,
       ct_stm32f100x8,
@@ -473,6 +508,9 @@ Type
       ct_nrf52832_xxaa,
       ct_nrf52832_xxaa,
       ct_nrf52840_xxaa,
       ct_nrf52840_xxaa,
 
 
+      { Raspberry Pi 2 }
+      ct_raspi2,
+
       // generic Thumb2 target
       // generic Thumb2 target
       ct_thumb2bare
       ct_thumb2bare
      );
      );
@@ -489,8 +527,6 @@ Const
    ControllerSupport = true;
    ControllerSupport = true;
    {# Size of native extended floating point type }
    {# Size of native extended floating point type }
    extended_size = 12;
    extended_size = 12;
-   {# Size of a multimedia register               }
-   mmreg_size = 16;
    { target cpu string (used by compiler options) }
    { target cpu string (used by compiler options) }
    target_cpu_string = 'arm';
    target_cpu_string = 'arm';
 
 
@@ -534,7 +570,8 @@ Const
      'ARMV7EM'
      'ARMV7EM'
    );
    );
 
 
-   fputypestr : array[tfputype] of string[9] = ('',
+   fputypestr : array[tfputype] of string[10] = (
+     'NONE',
      'SOFT',
      'SOFT',
      'LIBGCC',
      'LIBGCC',
      'FPA',
      'FPA',
@@ -542,9 +579,11 @@ Const
      'FPA11',
      'FPA11',
      'VFPV2',
      'VFPV2',
      'VFPV3',
      'VFPV3',
+     'NEON_VFPV3',
      'VFPV3_D16',
      'VFPV3_D16',
      'FPV4_S16',
      'FPV4_S16',
-     'VFPV4'
+     'VFPV4',
+     'NEON_VFPV4'
    );
    );
 
 
 
 
@@ -717,6 +756,12 @@ Const
       (controllertypestr:'STM32F051R4';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
       (controllertypestr:'STM32F051R4';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
       (controllertypestr:'STM32F051R6';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00008000; srambase:$20000000; sramsize:$00001000),
       (controllertypestr:'STM32F051R6';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00008000; srambase:$20000000; sramsize:$00001000),
       (controllertypestr:'STM32F051R8';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00002000),
       (controllertypestr:'STM32F051R8';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00002000),
+      (controllertypestr:'STM32F091CC';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091CB';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091RC';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091RB';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091VC';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091VB';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00008000),
 
 
       { STM32F1 series }
       { STM32F1 series }
       (controllertypestr:'STM32F100X4';     controllerunitstr:'STM32F10X_LD';     cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
       (controllertypestr:'STM32F100X4';     controllerunitstr:'STM32F10X_LD';     cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
@@ -983,24 +1028,25 @@ Const
       (controllertypestr:'NRF52832_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       (controllertypestr:'NRF52832_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       (controllertypestr:'NRF52840_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       (controllertypestr:'NRF52840_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       
       
+      { Raspberry Pi 2 }
+      (controllertypestr:'RASPI2'; controllerunitstr:'RASPI2'; cputype:cpu_armv7a; fputype:fpu_vfpv4; flashbase:$00000000; flashsize:$00000000; srambase:$00008000; sramsize:$10000000),
+
       { Bare bones }
       { Bare bones }
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
     );
     );
 
 
-   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16];
-
    { Supported optimizations, only used for information }
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
    supported_optimizerswitches = genericlevel1optimizerswitches+
                                  genericlevel2optimizerswitches+
                                  genericlevel2optimizerswitches+
                                  genericlevel3optimizerswitches-
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath,cs_opt_forcenostackframe];
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath,cs_opt_forcenostackframe];
 
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
 
@@ -1022,6 +1068,19 @@ Const
        CPUARM_HAS_UMULL
        CPUARM_HAS_UMULL
       );
       );
 
 
+   tfpuflags =
+      (
+        FPUARM_HAS_FPA,                { fpu is an fpa based FPU                                                               }
+        FPUARM_HAS_VFP_EXTENSION,      { fpu is a vfp extension                                                                }
+        FPUARM_HAS_VFP_DOUBLE,         { vfp has double support                                                                }
+        FPUARM_HAS_VFP_SINGLE_ONLY,    { vfp has only single support, disjunct to FPUARM_HAS_VFP_DOUBLE, for error checking    }
+        FPUARM_HAS_32REGS,             { vfp has 32 regs, without this flag, 16 are assumed                                    }
+        FPUARM_HAS_VMOV_CONST,         { vmov supports (some) real constants                                                   }
+        FPUARM_HAS_EXCEPTION_TRAPPING, { vfp does exceptions trapping                                                          }
+        FPUARM_HAS_NEON,               { fpu has neon extensions                                                               }
+        FPUARM_HAS_FMA                 { fpu has fused multiply/add instructions                                               }
+      );
+
  const
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
    cpu_capabilities : array[tcputype] of set of tcpuflags =
      ( { cpu_none     } [],
      ( { cpu_none     } [],
@@ -1045,6 +1104,22 @@ Const
        { cpu_armv7em  } [CPUARM_HAS_ALL_MEM,CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2,CPUARM_HAS_UMULL]
        { cpu_armv7em  } [CPUARM_HAS_ALL_MEM,CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2,CPUARM_HAS_UMULL]
      );
      );
 
 
+     fpu_capabilities : array[tfputype] of set of tfpuflags =
+       ( { fpu_none       } [],
+         { fpu_soft       } [],
+         { fpu_libgcc     } [],
+         { fpu_fpa        } [FPUARM_HAS_FPA],
+         { fpu_fpa10      } [FPUARM_HAS_FPA],
+         { fpu_fpa11      } [FPUARM_HAS_FPA],
+         { fpu_vfpv2      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
+         { fpu_vfpv3      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
+         { fpu_neon_vfpv3 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
+         { fpu_vfpv3_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
+         { fpu_fpv4_s16   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_SINGLE_ONLY,FPUARM_HAS_VMOV_CONST],
+         { fpu_vfpv4      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_neon_vfpv4 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
+       );
+
    { contains all CPU supporting any kind of thumb instruction set }
    { contains all CPU supporting any kind of thumb instruction set }
    cpu_has_thumb = [cpu_armv4t,cpu_armv5t,cpu_armv5te,cpu_armv5tej,cpu_armv6t2,cpu_armv6z,cpu_armv6m,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em];
    cpu_has_thumb = [cpu_armv4t,cpu_armv5t,cpu_armv5te,cpu_armv5tej,cpu_armv6t2,cpu_armv6z,cpu_armv6m,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em];
 
 

+ 9 - 3
compiler/arm/cpunode.pas

@@ -30,21 +30,27 @@ unit cpunode;
     uses
     uses
        { generic nodes }
        { generic nodes }
        ncgbas,ncgld,ncgflw,ncgcnv,ncgmem,ncgcon,ncgcal,ncgset,ncginl,ncgopt,ncgmat,ncgobjc,
        ncgbas,ncgld,ncgflw,ncgcnv,ncgmem,ncgcon,ncgcal,ncgset,ncginl,ncgopt,ncgmat,ncgobjc,
+       { symtable }
+       symcpu,
+       aasmdef,
        { to be able to only parts of the generic code,
        { to be able to only parts of the generic code,
          the processor specific nodes must be included
          the processor specific nodes must be included
          after the generic one (FK)
          after the generic one (FK)
        }
        }
+{$ifndef llvm}
        narmadd,
        narmadd,
        narmcal,
        narmcal,
        narmmat,
        narmmat,
        narminl,
        narminl,
+       narmld,
        narmcnv,
        narmcnv,
        narmcon,
        narmcon,
        narmset,
        narmset,
        narmmem,
        narmmem,
-       { symtable }
-       symcpu,
-       aasmdef
+       narmutil
+{$else}
+       llvmnode
+{$endif}
        ;
        ;
 
 
 
 

+ 214 - 63
compiler/arm/cpupara.pas

@@ -30,32 +30,36 @@ unit cpupara;
        globtype,globals,
        globtype,globals,
        aasmdata,
        aasmdata,
        cpuinfo,cpubase,cgbase,cgutils,
        cpuinfo,cpubase,cgbase,cgutils,
-       symconst,symtype,symdef,parabase,paramgr;
+       symconst,symtype,symdef,parabase,paramgr,armpara;
 
 
     type
     type
-       tcpuparamanager = class(tparamanager)
+       tcpuparamanager = class(tarmgenparamanager)
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
+          function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
+          procedure getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
          private
          private
+          function usemmpararegs(calloption: tproccalloption; variadic: boolean): boolean;
+          function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
           procedure init_values(p: tabstractprocdef; side: tcallercallee; var curintreg,
           procedure init_values(p: tabstractprocdef; side: tcallercallee; var curintreg,
             curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword;
             curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword;
             var sparesinglereg: tregister);
             var sparesinglereg: tregister);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
             var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
             var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+          procedure paradeftointparaloc(paradef: tdef; paracgsize: tcgsize; out paralocdef: tdef; out paralocsize: tcgsize);
        end;
        end;
 
 
   implementation
   implementation
 
 
     uses
     uses
        verbose,systems,cutils,
        verbose,systems,cutils,
-       defutil,symsym,symcpu,symtable,
+       defutil,symsym,symcpu,symtable,symutil,
        { PowerPC uses procinfo as well in cpupara, so this should not hurt }
        { PowerPC uses procinfo as well in cpupara, so this should not hurt }
        procinfo;
        procinfo;
 
 
@@ -81,7 +85,16 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    procedure tcpuparamanager.getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    function tcpuparamanager.get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;
+      const
+        saved_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..6] of tsuperregister{$endif} =
+          (RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,RS_R10);
+      begin
+        result:=saved_regs;
+      end;
+
+
+    procedure tcpuparamanager.getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
       var
         paraloc : pcgparalocation;
         paraloc : pcgparalocation;
         psym : tparavarsym;
         psym : tparavarsym;
@@ -120,7 +133,9 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+    function tcpuparamanager.getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+      var
+        basedef: tdef;
       begin
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
            if push_addr_param for the def is true
@@ -134,7 +149,7 @@ unit cpupara;
                 getparaloc:=LOC_MMREGISTER
                 getparaloc:=LOC_MMREGISTER
               else if (calloption in cdecl_pocalls) or
               else if (calloption in cdecl_pocalls) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16]) then
+                 (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
                 { the ARM eabi also allows passing VFP values via VFP registers,
                   but Mac OS X doesn't seem to do that and linux only does it if
                   but Mac OS X doesn't seem to do that and linux only does it if
                   built with the "-mfloat-abi=hard" option }
                   built with the "-mfloat-abi=hard" option }
@@ -150,7 +165,11 @@ unit cpupara;
             classrefdef:
             classrefdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             recorddef:
             recorddef:
-              getparaloc:=LOC_REGISTER;
+              if usemmpararegs(calloption,isvariadic) and
+                 is_hfa(p,basedef) then
+                getparaloc:=LOC_MMREGISTER
+              else
+                getparaloc:=LOC_REGISTER;
             objectdef:
             objectdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             stringdef:
             stringdef:
@@ -165,6 +184,9 @@ unit cpupara;
             arraydef:
             arraydef:
               if is_dynamic_array(p) then
               if is_dynamic_array(p) then
                 getparaloc:=LOC_REGISTER
                 getparaloc:=LOC_REGISTER
+              else if usemmpararegs(calloption,isvariadic) and
+                 is_hfa(p,basedef) then
+                getparaloc:=LOC_MMREGISTER
               else
               else
                 getparaloc:=LOC_REFERENCE;
                 getparaloc:=LOC_REFERENCE;
             setdef:
             setdef:
@@ -210,6 +232,8 @@ unit cpupara;
             result:=not is_smallset(def);
             result:=not is_smallset(def);
           stringdef :
           stringdef :
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+          else
+            ;
         end;
         end;
       end;
       end;
 
 
@@ -218,12 +242,19 @@ unit cpupara;
       var
       var
         i: longint;
         i: longint;
         sym: tsym;
         sym: tsym;
+        basedef: tdef;
       begin
       begin
         if handle_common_ret_in_param(def,pd,result) then
         if handle_common_ret_in_param(def,pd,result) then
           exit;
           exit;
         case def.typ of
         case def.typ of
           recorddef:
           recorddef:
             begin
             begin
+              if usemmpararegs(pd.proccalloption,is_c_variadic(pd)) and
+                 is_hfa(def,basedef) then
+                begin
+                  result:=false;
+                  exit;
+                end;
               result:=def.size>4;
               result:=def.size>4;
               if not result and
               if not result and
                  (target_info.abi in [abi_default,abi_armeb]) then
                  (target_info.abi in [abi_default,abi_armeb]) then
@@ -263,7 +294,7 @@ unit cpupara;
                   for i:=0 to trecorddef(def).symtable.SymList.count-1 do
                   for i:=0 to trecorddef(def).symtable.SymList.count-1 do
                     begin
                     begin
                       sym:=tsym(trecorddef(def).symtable.SymList[i]);
                       sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
+                      if not is_normal_fieldvarsym(sym) then
                         continue;
                         continue;
                       { bitfield -> ignore }
                       { bitfield -> ignore }
                       if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
                       if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
@@ -316,11 +347,13 @@ unit cpupara;
 
 
       var
       var
         nextintreg,nextfloatreg,nextmmreg : tsuperregister;
         nextintreg,nextfloatreg,nextmmreg : tsuperregister;
-        paradef : tdef;
+        paradef,
+        hfabasedef : tdef;
         paraloc : pcgparalocation;
         paraloc : pcgparalocation;
         stack_offset : aword;
         stack_offset : aword;
         hp : tparavarsym;
         hp : tparavarsym;
         loc : tcgloc;
         loc : tcgloc;
+        hfabasesize  : tcgsize;
         paracgsize   : tcgsize;
         paracgsize   : tcgsize;
         paralen : longint;
         paralen : longint;
         i : integer;
         i : integer;
@@ -348,6 +381,31 @@ unit cpupara;
         end;
         end;
 
 
 
 
+      procedure updatemmregs(paradef, basedef: tdef);
+        var
+          regsavailable,
+          regsneeded: longint;
+          basesize: asizeint;
+        begin
+          basesize:=basedef.size;
+          regsneeded:=paradef.size div basesize;
+          regsavailable:=ord(RS_D7)-ord(nextmmreg)+1;
+          case basesize of
+            4:
+              regsavailable:=regsavailable*2+ord(sparesinglereg<>NR_NO);
+            8:
+              ;
+            else
+              internalerror(2019022301);
+          end;
+          if regsavailable<regsneeded then
+            begin
+              nextmmreg:=succ(RS_D7);
+              sparesinglereg:=NR_NO;
+            end;
+        end;
+
+
       begin
       begin
         result:=0;
         result:=0;
         nextintreg:=curintreg;
         nextintreg:=curintreg;
@@ -367,6 +425,11 @@ unit cpupara;
             if (p.proccalloption in cstylearrayofconst) and
             if (p.proccalloption in cstylearrayofconst) and
                is_array_of_const(paradef) then
                is_array_of_const(paradef) then
               begin
               begin
+                hp.paraloc[side].def:=paradef;
+                hp.paraloc[side].size:=OS_NO;
+                hp.paraloc[side].alignment:=std_param_align;
+                hp.paraloc[side].intsize:=0;
+
                 paraloc:=hp.paraloc[side].add_location;
                 paraloc:=hp.paraloc[side].add_location;
                 { hack: the paraloc must be valid, but is not actually used }
                 { hack: the paraloc must be valid, but is not actually used }
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.loc:=LOC_REGISTER;
@@ -413,6 +476,18 @@ unit cpupara;
              hp.paraloc[side].def:=paradef;
              hp.paraloc[side].def:=paradef;
              firstparaloc:=true;
              firstparaloc:=true;
 
 
+             if (loc=LOC_MMREGISTER) and
+                is_hfa(paradef,hfabasedef) then
+               begin
+                 updatemmregs(paradef,hfabasedef);
+                 hfabasesize:=def_cgsize(hfabasedef);
+               end
+             else
+               begin
+                 hfabasedef:=nil;
+                 hfabasesize:=OS_NO;
+               end;
+
 {$ifdef EXTDEBUG}
 {$ifdef EXTDEBUG}
              if paralen=0 then
              if paralen=0 then
                internalerror(200410311);
                internalerror(200410311);
@@ -420,59 +495,44 @@ unit cpupara;
              while paralen>0 do
              while paralen>0 do
                begin
                begin
                  paraloc:=hp.paraloc[side].add_location;
                  paraloc:=hp.paraloc[side].add_location;
-
-                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
-                   case paracgsize of
-                     OS_F32:
-                       begin
-                         paraloc^.size:=OS_32;
-                         paraloc^.def:=u32inttype;
-                       end;
-                     OS_F64:
-                       begin
-                         paraloc^.size:=OS_32;
-                         paraloc^.def:=u32inttype;
-                       end;
-                     else
-                       internalerror(2005082901);
-                   end
-                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   begin
-                     paraloc^.size:=OS_32;
-                     paraloc^.def:=u32inttype;
-                   end
-                 else
-                   begin
-                     paraloc^.size:=paracgsize;
-                     paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
-                   end;
                  case loc of
                  case loc of
                     LOC_REGISTER:
                     LOC_REGISTER:
                       begin
                       begin
+                        if paracgsize in [OS_F32,OS_F64,OS_F80] then
+                          case paracgsize of
+                            OS_F32,
+                            OS_F64:
+                              begin
+                                paraloc^.size:=OS_32;
+                                paraloc^.def:=u32inttype;
+                              end;
+                            else
+                              internalerror(2005082901);
+                          end;
                         { align registers for eabi }
                         { align registers for eabi }
                         if (target_info.abi in [abi_eabi,abi_eabihf]) and
                         if (target_info.abi in [abi_eabi,abi_eabihf]) and
                            firstparaloc and
                            firstparaloc and
                            (paradef.alignment=8) then
                            (paradef.alignment=8) then
                           begin
                           begin
+                            hp.paraloc[side].Alignment:=8;
                             if (nextintreg in [RS_R1,RS_R3]) then
                             if (nextintreg in [RS_R1,RS_R3]) then
                               inc(nextintreg)
                               inc(nextintreg)
                             else if nextintreg>RS_R3 then
                             else if nextintreg>RS_R3 then
                               stack_offset:=align(stack_offset,8);
                               stack_offset:=align(stack_offset,8);
                           end;
                           end;
-                        { this is not abi compliant
-                          why? (FK) }
                         if nextintreg<=RS_R3 then
                         if nextintreg<=RS_R3 then
                           begin
                           begin
+                            paradeftointparaloc(paradef,paracgsize,paraloc^.def,paraloc^.size);
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
                             paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
                             inc(nextintreg);
                             inc(nextintreg);
                           end
                           end
                         else
                         else
                           begin
                           begin
-                            { LOC_REFERENCE always contains everything that's left }
+                            { LOC_REFERENCE always contains everything that's left as a multiple of 4 bytes}
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            paraloc^.def:=carraydef.getreusable_no_free(u8inttype,paralen);
+                            paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
+                            paraloc^.size:=def_cgsize(paraloc^.def);
                             if (side=callerside) then
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
                             paraloc^.reference.offset:=stack_offset;
@@ -482,6 +542,8 @@ unit cpupara;
                       end;
                       end;
                     LOC_FPUREGISTER:
                     LOC_FPUREGISTER:
                       begin
                       begin
+                        paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         if nextfloatreg<=RS_F3 then
                         if nextfloatreg<=RS_F3 then
                           begin
                           begin
                             paraloc^.loc:=LOC_FPUREGISTER;
                             paraloc^.loc:=LOC_FPUREGISTER;
@@ -509,8 +571,18 @@ unit cpupara;
                       end;
                       end;
                     LOC_MMREGISTER:
                     LOC_MMREGISTER:
                       begin
                       begin
+                        if assigned(hfabasedef) then
+                          begin
+                            paraloc^.def:=hfabasedef;
+                            paraloc^.size:=hfabasesize;
+                          end
+                        else
+                          begin
+                            paraloc^.size:=paracgsize;
+                            paraloc^.def:=paradef;
+                          end;
                         if (nextmmreg<=RS_D7) or
                         if (nextmmreg<=RS_D7) or
-                           ((paraloc^.size = OS_F32) and
+                           ((paraloc^.size=OS_F32) and
                             (sparesinglereg<>NR_NO)) then
                             (sparesinglereg<>NR_NO)) then
                           begin
                           begin
                             paraloc^.loc:=LOC_MMREGISTER;
                             paraloc^.loc:=LOC_MMREGISTER;
@@ -546,7 +618,6 @@ unit cpupara;
                             { LOC_REFERENCE always contains everything that's left }
                             { LOC_REFERENCE always contains everything that's left }
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.size:=int_cgsize(paralen);
                             paraloc^.size:=int_cgsize(paralen);
-                            paraloc^.def:=carraydef.getreusable_no_free(u8inttype,paralen);
                             if (side=callerside) then
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
                             paraloc^.reference.offset:=stack_offset;
@@ -556,6 +627,8 @@ unit cpupara;
                       end;
                       end;
                     LOC_REFERENCE:
                     LOC_REFERENCE:
                       begin
                       begin
+                        paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
                           begin
                             paraloc^.size:=OS_ADDR;
                             paraloc^.size:=OS_ADDR;
@@ -568,10 +641,11 @@ unit cpupara;
                             if (target_info.abi in [abi_eabi,abi_eabihf]) and
                             if (target_info.abi in [abi_eabi,abi_eabihf]) and
                                firstparaloc and
                                firstparaloc and
                                (paradef.alignment=8) then
                                (paradef.alignment=8) then
-                              stack_offset:=align(stack_offset,8);
+                              begin
+                                stack_offset:=align(stack_offset,8);
+                                hp.paraloc[side].Alignment:=8;
+                              end;
 
 
-                             paraloc^.size:=paracgsize;
-                             paraloc^.def:=paradef;
                              paraloc^.loc:=LOC_REFERENCE;
                              paraloc^.loc:=LOC_REFERENCE;
                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
                              paraloc^.reference.offset:=stack_offset;
                              paraloc^.reference.offset:=stack_offset;
@@ -614,41 +688,101 @@ unit cpupara;
       end;
       end;
 
 
 
 
+    procedure tcpuparamanager.paradeftointparaloc(paradef: tdef; paracgsize: tcgsize; out paralocdef: tdef; out paralocsize: tcgsize);
+      begin
+        if not(paracgsize in [OS_32,OS_S32]) or
+           (paradef.typ in [arraydef,recorddef]) or
+           is_object(paradef) then
+          begin
+            paralocsize:=OS_32;
+            paralocdef:=u32inttype;
+          end
+        else
+          begin
+            paralocsize:=paracgsize;
+            paralocdef:=paradef;
+          end;
+      end;
+
+
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
       var
-        paraloc : pcgparalocation;
+        paraloc: pcgparalocation;
         retcgsize  : tcgsize;
         retcgsize  : tcgsize;
+        basedef: tdef;
+        i: longint;
+        sparesinglereg: tregister;
+        mmreg : TSuperRegister;
       begin
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
            exit;
 
 
         paraloc:=result.add_location;
         paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
-        if result.def.typ=floatdef then
+        basedef:=nil;
+        sparesinglereg:=NR_NO;
+        if (result.def.typ=floatdef) or
+           is_hfa(result.def,basedef) then
           begin
           begin
-            if (target_info.abi=abi_eabihf) or (p.proccalloption=pocall_hardfloat) then
+            if usemmpararegs(p.proccalloption,is_c_variadic(p)) then
               begin
               begin
-                paraloc^.loc:=LOC_MMREGISTER;
+                if assigned(basedef) then
+                  begin
+                    for i:=2 to result.def.size div basedef.size do
+                      result.add_location;
+                    retcgsize:=def_cgsize(basedef);
+                  end
+                else
+                  basedef:=result.def;
                 case retcgsize of
                 case retcgsize of
                   OS_64,
                   OS_64,
                   OS_F64:
                   OS_F64:
                     begin
                     begin
-                      paraloc^.register:=NR_MM_RESULT_REG;
+                      mmreg:=RS_D0;
                     end;
                     end;
                   OS_32,
                   OS_32,
                   OS_F32:
                   OS_F32:
                     begin
                     begin
-                      paraloc^.register:=NR_S0;
+                      mmreg:=RS_S0;
                     end;
                     end;
                   else
                   else
                     internalerror(2012032501);
                     internalerror(2012032501);
                 end;
                 end;
-                paraloc^.size:=retcgsize;
-                paraloc^.def:=result.def;
+                repeat
+                  paraloc^.loc:=LOC_MMREGISTER;
+                  { mm registers are strangly ordered in the arm compiler }
+                  case retcgsize of
+                    OS_32,OS_F32:
+                      begin
+                        if sparesinglereg=NR_NO then
+                          begin
+                            paraloc^.register:=newreg(R_MMREGISTER,mmreg,R_SUBFS);
+                            sparesinglereg:=newreg(R_MMREGISTER,mmreg-RS_S0+RS_S1,R_SUBFS);
+                            inc(mmreg);
+                          end
+                        else
+                          begin
+                            paraloc^.register:=sparesinglereg;
+                            sparesinglereg:=NR_NO;
+                          end;
+                      end;
+                    OS_64,OS_F64:
+                      begin
+                        paraloc^.register:=newreg(R_MMREGISTER,mmreg,R_SUBFD);
+                        inc(mmreg);
+                      end;
+                    else
+                      Internalerror(2019081201);
+                  end;
+
+                  paraloc^.size:=retcgsize;
+                  paraloc^.def:=basedef;
+                  paraloc:=paraloc^.next;
+                until not assigned(paraloc);
               end
               end
             else if (p.proccalloption in [pocall_softfloat]) or
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16]) then
+               (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
               begin
               begin
                 case retcgsize of
                 case retcgsize of
                   OS_64,
                   OS_64,
@@ -730,8 +864,7 @@ unit cpupara;
                     end;
                     end;
                   else
                   else
                     begin
                     begin
-                      paraloc^.size:=retcgsize;
-                      paraloc^.def:=result.def;
+                      paradeftointparaloc(result.def,result.size,paraloc^.def,paraloc^.size);
                     end;
                     end;
                 end;
                 end;
               end;
               end;
@@ -739,6 +872,14 @@ unit cpupara;
       end;
       end;
 
 
 
 
+    function tcpuparamanager.usemmpararegs(calloption: tproccalloption; variadic: boolean): boolean;
+      begin
+        result:=
+         ((target_info.abi=abi_eabihf) or (calloption=pocall_hardfloat)) and
+          (not variadic);
+      end;
+
+
     function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
     function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
       var
       var
         cur_stack_offset: aword;
         cur_stack_offset: aword;
@@ -753,20 +894,30 @@ unit cpupara;
      end;
      end;
 
 
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
       var
       var
         cur_stack_offset: aword;
         cur_stack_offset: aword;
         curintreg, curfloatreg, curmmreg: tsuperregister;
         curintreg, curfloatreg, curmmreg: tsuperregister;
         sparesinglereg:tregister;
         sparesinglereg:tregister;
       begin
       begin
-        init_values(p,callerside,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_values(p,side,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
 
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
         if (p.proccalloption in cstylearrayofconst) then
         if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+          begin
+            { just continue loading the parameters in the registers }
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  result:=create_paraloc_info_intern(p,side,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+                else
+                  internalerror(2019021915);
+              end;
+          end
         else
         else
           internalerror(200410231);
           internalerror(200410231);
+
+        create_funcretloc_info(p,side);
       end;
       end;
 
 
 begin
 begin

+ 16 - 7
compiler/arm/cpupi.pas

@@ -49,13 +49,15 @@ unit cpupi;
           procedure generate_parameter_info;override;
           procedure generate_parameter_info;override;
           procedure allocate_got_register(list : TAsmList);override;
           procedure allocate_got_register(list : TAsmList);override;
           procedure postprocess_code;override;
           procedure postprocess_code;override;
+
+          procedure allocate_tls_register(list : TAsmList);override;
        end;
        end;
 
 
 
 
   implementation
   implementation
 
 
     uses
     uses
-       globals,systems,
+       globals,systems,verbose,
        cpubase,
        cpubase,
        tgobj,
        tgobj,
        symconst,symtype,symsym,symcpu,paramgr,
        symconst,symtype,symsym,symcpu,paramgr,
@@ -154,6 +156,10 @@ unit cpupi;
             maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
             maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
             floatsavesize:=0;
             floatsavesize:=0;
             case current_settings.fputype of
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa10,
               fpu_fpa11:
               fpu_fpa11:
@@ -172,18 +178,15 @@ unit cpupi;
                   if firstfloatreg<>RS_NO then
                   if firstfloatreg<>RS_NO then
                     floatsavesize:=(lastfloatreg-firstfloatreg+1)*12;
                     floatsavesize:=(lastfloatreg-firstfloatreg+1)*12;
                 end;
                 end;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
+              else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
                 begin
                 begin
                   floatsavesize:=0;
                   floatsavesize:=0;
                   regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
                   regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
                   for r:=RS_D0 to RS_D31 do
                   for r:=RS_D0 to RS_D31 do
                     if r in regs then
                     if r in regs then
                       inc(floatsavesize,8);
                       inc(floatsavesize,8);
-                end;
-              fpu_fpv4_s16:
+                end
+              else
                 begin
                 begin
                   floatsavesize:=0;
                   floatsavesize:=0;
                   regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
                   regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
@@ -276,6 +279,12 @@ unit cpupi;
         finalizearmcode(aktproccode,aktlocaldata);
         finalizearmcode(aktproccode,aktlocaldata);
       end;
       end;
 
 
+
+    procedure tcpuprocinfo.allocate_tls_register(list: TAsmList);
+      begin
+        current_procinfo.tlsoffset:=cg.getaddressregister(list);
+      end;
+
 begin
 begin
    cprocinfo:=tcpuprocinfo;
    cprocinfo:=tcpuprocinfo;
 end.
 end.

+ 6 - 7
compiler/arm/hlcgcpu.pas

@@ -46,8 +46,6 @@ interface
       procedure a_jmp_external_name(list: TAsmList; const externalname: TSymStr); override;
       procedure a_jmp_external_name(list: TAsmList; const externalname: TSymStr); override;
     end;
     end;
 
 
-  procedure create_hlcodegen;
-
 implementation
 implementation
 
 
   uses
   uses
@@ -66,7 +64,7 @@ implementation
         href : treference;
         href : treference;
         l : TAsmLabel;
         l : TAsmLabel;
       begin
       begin
-        reference_reset_base(href,voidpointertype,NR_R0,0,sizeof(pint),[]);
+        reference_reset_base(href,voidpointertype,NR_R0,0,ctempposinvalid,sizeof(pint),[]);
         if GenerateThumbCode then
         if GenerateThumbCode then
           begin
           begin
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
@@ -111,7 +109,7 @@ implementation
           Internalerror(200006139);
           Internalerror(200006139);
         if GenerateThumbCode then
         if GenerateThumbCode then
           begin
           begin
-            reference_reset_base(href,voidpointertype,NR_R0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint),[]);
+            reference_reset_base(href,voidpointertype,NR_R0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),ctempposinvalid,sizeof(pint),[]);
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
               begin
               begin
                 list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
                 list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
@@ -144,7 +142,7 @@ implementation
           end
           end
         else
         else
           begin
           begin
-            reference_reset_base(href,voidpointertype,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint),[]);
+            reference_reset_base(href,voidpointertype,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),ctempposinvalid,sizeof(pint),[]);
             cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
             cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
           end;
           end;
         if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
         if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
@@ -179,7 +177,7 @@ implementation
       if make_global then
       if make_global then
         list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0,procdef))
         list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0,procdef))
       else
       else
-        list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0,procdef));
+        list.concat(Tai_symbol.Createname_hidden(labelname,AT_FUNCTION,0,procdef));
 
 
       { the wrapper might need aktlocaldata for the additional data to
       { the wrapper might need aktlocaldata for the additional data to
         load the constant }
         load the constant }
@@ -257,7 +255,7 @@ implementation
 
 
 
 
 
 
-  procedure create_hlcodegen;
+  procedure create_hlcodegen_cpu;
     begin
     begin
       if GenerateThumbCode then
       if GenerateThumbCode then
         hlcg:=tthumbhlcgcpu.create
         hlcg:=tthumbhlcgcpu.create
@@ -268,4 +266,5 @@ implementation
 
 
 begin
 begin
   chlcgobj:=tbasehlcgarm;
   chlcgobj:=tbasehlcgarm;
+  create_hlcodegen:=@create_hlcodegen_cpu;
 end.
 end.

+ 24 - 22
compiler/arm/narmadd.pas

@@ -161,7 +161,7 @@ interface
 
 
     function tarmaddnode.use_fma : boolean;
     function tarmaddnode.use_fma : boolean;
       begin
       begin
-       Result:=current_settings.fputype in [fpu_vfpv4];
+       Result:=FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype];
       end;
       end;
 
 
 
 
@@ -205,10 +205,10 @@ interface
                  location.register,left.location.register,right.location.register),
                  location.register,left.location.register,right.location.register),
                  cgsize2fpuoppostfix[def_cgsize(resultdef)]));
                  cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          fpu_soft:
+            { this case should be handled already by pass1 }
+            internalerror(200308252);
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               { force mmreg as location, left right doesn't matter
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
                 as both will be in a fpureg }
@@ -238,8 +238,9 @@ interface
 
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op,
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op,
                  location.register,left.location.register,right.location.register),pf));
                  location.register,left.location.register,right.location.register),pf));
-            end;
-          fpu_fpv4_s16:
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               { force mmreg as location, left right doesn't matter
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
                 as both will be in a fpureg }
@@ -263,10 +264,8 @@ interface
               end;
               end;
 
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
-            end;
-          fpu_soft:
-            { this case should be handled already by pass1 }
-            internalerror(200308252);
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
           else
           else
             internalerror(200308251);
             internalerror(200308251);
         end;
         end;
@@ -305,10 +304,7 @@ interface
                    left.location.register,right.location.register),
                    left.location.register,right.location.register),
                    cgsize2fpuoppostfix[def_cgsize(resultdef)]));
                    cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
@@ -325,11 +321,12 @@ interface
 
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
                 left.location.register,right.location.register), pf));
                 left.location.register,right.location.register), pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VMRS,NR_APSR_nzcv,NR_FPSCR));
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VMRS,NR_APSR_nzcv,NR_FPSCR));
               location.resflags:=GetFpuResFlags;
               location.resflags:=GetFpuResFlags;
-            end;
-          fpu_fpv4_s16:
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
@@ -341,10 +338,11 @@ interface
 
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
                 left.location.register,right.location.register),PF_F32));
                 left.location.register,right.location.register),PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
               current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
-            end;
-          fpu_soft:
+            end
+          else
             { this case should be handled already by pass1 }
             { this case should be handled already by pass1 }
             internalerror(2009112404);
             internalerror(2009112404);
         end;
         end;
@@ -517,6 +515,8 @@ interface
                         cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                         cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                         nodetype:=oldnodetype;
                         nodetype:=oldnodetype;
                      end;
                      end;
+                   else
+                     ;
                 end;
                 end;
                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reglo,right.location.register64.reglo));
                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reglo,right.location.register64.reglo));
@@ -586,7 +586,7 @@ interface
         result := nil;
         result := nil;
         notnode := false;
         notnode := false;
 
 
-        if current_settings.fputype = fpu_fpv4_s16 then
+        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
           begin
           begin
             case tfloatdef(left.resultdef).floattype of
             case tfloatdef(left.resultdef).floattype of
               s32real:
               s32real:
@@ -634,7 +634,7 @@ interface
                   end;
                   end;
 
 
                   if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
                   if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
-                    resultdef:=pasbool8type;
+                    resultdef:=pasbool1type;
                   result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
                   result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
                       ctypeconvnode.create_internal(right,fdef),
                       ctypeconvnode.create_internal(right,fdef),
                       ccallparanode.create(
                       ccallparanode.create(
@@ -647,6 +647,8 @@ interface
                   if notnode then
                   if notnode then
                     result:=cnotnode.create(result);
                     result:=cnotnode.create(result);
                 end;
                 end;
+              else
+                internalerror(2019050933);
             end;
             end;
           end
           end
         else
         else
@@ -720,7 +722,7 @@ interface
       begin
       begin
         result:=GenerateThumbCode or
         result:=GenerateThumbCode or
           not(CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) or
           not(CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) or
-          (cs_check_overflow in current_settings.localswitches);
+          needoverflowcheck;
       end;
       end;
 
 
 begin
 begin

+ 1 - 1
compiler/arm/narmcal.pas

@@ -83,7 +83,7 @@ implementation
          (target_info.abi<>abi_eabihf) and
          (target_info.abi<>abi_eabihf) and
          (procdefinition.proccalloption<>pocall_hardfloat) and
          (procdefinition.proccalloption<>pocall_hardfloat) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16])) then
+          (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last])) then
         begin
         begin
           { keep the fpu values in integer registers for now, the code
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary
             generator will move them to memory or an mmregister when necessary

+ 10 - 14
compiler/arm/narmcnv.pas

@@ -78,7 +78,7 @@ implementation
 {$ifdef cpufpemu}
 {$ifdef cpufpemu}
           (current_settings.fputype=fpu_soft) or
           (current_settings.fputype=fpu_soft) or
 {$endif cpufpemu}
 {$endif cpufpemu}
-          (current_settings.fputype=fpu_fpv4_s16) then
+          (FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) then
           result:=inherited first_int_to_real
           result:=inherited first_int_to_real
         else
         else
           begin
           begin
@@ -117,11 +117,7 @@ implementation
               fpu_fpa10,
               fpu_fpa10,
               fpu_fpa11:
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16,
-              fpu_fpv4_s16:
+              fpu_vfp_first..fpu_vfp_last:
                 expectloc:=LOC_MMREGISTER;
                 expectloc:=LOC_MMREGISTER;
               else
               else
                 internalerror(2009112702);
                 internalerror(2009112702);
@@ -131,7 +127,7 @@ implementation
 
 
     function tarmtypeconvnode.first_real_to_real: tnode;
     function tarmtypeconvnode.first_real_to_real: tnode;
       begin
       begin
-        if (current_settings.fputype=fpu_fpv4_s16) then
+        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
           begin
           begin
             case tfloatdef(left.resultdef).floattype of
             case tfloatdef(left.resultdef).floattype of
               s32real:
               s32real:
@@ -244,10 +240,7 @@ implementation
                   end;
                   end;
               end;
               end;
             end;
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
               signed:=left.location.size=OS_S32;
@@ -261,8 +254,8 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,
                 location.register,left.location.register),
                 location.register,left.location.register),
                 signedprec2vfppf[signed,location.size]));
                 signedprec2vfppf[signed,location.size]));
-            end;
-          fpu_fpv4_s16:
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
               signed:=left.location.size=OS_S32;
@@ -277,7 +270,10 @@ implementation
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32S32))
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32S32))
               else
               else
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32U32));
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32U32));
-            end;
+            end
+          else
+            { should be handled in pass 1 }
+            internalerror(2019050934);
         end;
         end;
       end;
       end;
 
 

+ 93 - 64
compiler/arm/narmcon.pas

@@ -26,10 +26,11 @@ unit narmcon;
 interface
 interface
 
 
     uses
     uses
-      ncgcon,cpubase;
+      node,ncgcon,cpubase;
 
 
     type
     type
       tarmrealconstnode = class(tcgrealconstnode)
       tarmrealconstnode = class(tcgrealconstnode)
+        function pass_1 : tnode;override;
         procedure pass_generate_code;override;
         procedure pass_generate_code;override;
       end;
       end;
 
 
@@ -39,9 +40,10 @@ interface
       verbose,
       verbose,
       globtype,globals,
       globtype,globals,
       cpuinfo,
       cpuinfo,
-      aasmbase,aasmtai,aasmdata,symdef,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symdef,
       defutil,
       defutil,
-      cgbase,cgutils,
+      cgbase,cgutils,cgobj,
       procinfo,
       procinfo,
       ncon;
       ncon;
 
 
@@ -49,6 +51,17 @@ interface
                            TARMREALCONSTNODE
                            TARMREALCONSTNODE
 *****************************************************************************}
 *****************************************************************************}
 
 
+    function tarmrealconstnode.pass_1 : tnode;
+      begin
+        result:=nil;
+        if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
+           IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           expectloc:=LOC_MMREGISTER
+         else
+           expectloc:=LOC_CREFERENCE;
+      end;
+
+
     procedure tarmrealconstnode.pass_generate_code;
     procedure tarmrealconstnode.pass_generate_code;
       { I suppose the parser/pass_1 must make sure the generated real  }
       { I suppose the parser/pass_1 must make sure the generated real  }
       { constants are actually supported by the target processor? (JM) }
       { constants are actually supported by the target processor? (JM) }
@@ -59,75 +72,91 @@ interface
          lastlabel : tasmlabel;
          lastlabel : tasmlabel;
          realait : tairealconsttype;
          realait : tairealconsttype;
          hiloswapped : boolean;
          hiloswapped : boolean;
+         pf : TOpPostfix;
 
 
       begin
       begin
-        location_reset_ref(location,LOC_CREFERENCE,def_cgsize(resultdef),4,[]);
-        lastlabel:=nil;
-        realait:=floattype2ait[tfloatdef(resultdef).floattype];
-        hiloswapped:=is_double_hilo_swapped;
-        { const already used ? }
-        if not assigned(lab_real) then
+        if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
+          IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
           begin
           begin
-            current_asmdata.getjumplabel(lastlabel);
-            lab_real:=lastlabel;
-            current_procinfo.aktlocaldata.concat(Tai_label.Create(lastlabel));
-            location.reference.symboldata:=current_procinfo.aktlocaldata.last;
-            case realait of
-              aitrealconst_s32bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s32real(ts32real(value_real)));
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s32val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
-
-              aitrealconst_s64bit :
-                begin
-                  if hiloswapped then
-                    current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real_hiloswapped(ts64real(value_real)))
-                  else
-                    current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real(ts64real(value_real)));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s64val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-               end;
-
-              aitrealconst_s80bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s80real(value_real,tfloatdef(resultdef).size));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s80val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            if tfloatdef(resultdef).floattype=s32real then
+              pf:=PF_F32
+            else
+              pf:=PF_F64;
+            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_realconst(A_VMOV,
+               location.register,value_real),pf));
+          end
+        else
+          begin
+            location_reset_ref(location,LOC_CREFERENCE,def_cgsize(resultdef),4,[]);
+            lastlabel:=nil;
+            realait:=floattype2ait[tfloatdef(resultdef).floattype];
+            hiloswapped:=is_double_hilo_swapped;
+            { const already used ? }
+            if not assigned(lab_real) then
+              begin
+                current_asmdata.getjumplabel(lastlabel);
+                lab_real:=lastlabel;
+                current_procinfo.aktlocaldata.concat(Tai_label.Create(lastlabel));
+                location.reference.symboldata:=current_procinfo.aktlocaldata.last;
+                case realait of
+                  aitrealconst_s32bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s32real(ts32real(value_real)));
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s32val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
+
+                  aitrealconst_s64bit :
+                    begin
+                      if hiloswapped then
+                        current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real_hiloswapped(ts64real(value_real)))
+                      else
+                        current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real(ts64real(value_real)));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s64val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                   end;
+
+                  aitrealconst_s80bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s80real(value_real,tfloatdef(resultdef).size));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s80val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
 {$ifdef cpufloat128}
 {$ifdef cpufloat128}
-              aitrealconst_s128bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s128real(value_real));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s128val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
+                  aitrealconst_s128bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s128real(value_real));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s128val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
 {$endif cpufloat128}
 {$endif cpufloat128}
 
 
-              { the round is necessary for native compilers where comp isn't a float }
-              aitrealconst_s64comp :
-                if (value_real>9223372036854775807.0) or (value_real<-9223372036854775808.0) then
-                  message(parser_e_range_check_error)
+                  { the round is necessary for native compilers where comp isn't a float }
+                  aitrealconst_s64comp :
+                    if (value_real>9223372036854775807.0) or (value_real<-9223372036854775808.0) then
+                      message(parser_e_range_check_error)
+                    else
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s64compreal(round(value_real)));
                 else
                 else
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s64compreal(round(value_real)));
-            else
-              internalerror(2005092401);
-            end;
+                  internalerror(2005092401);
+                end;
+              end;
+            location.reference.symbol:=lab_real;
+            location.reference.base:=NR_R15;
           end;
           end;
-        location.reference.symbol:=lab_real;
-        location.reference.base:=NR_R15;
       end;
       end;
 
 
 begin
 begin

+ 51 - 60
compiler/arm/narminl.pas

@@ -86,11 +86,7 @@ implementation
                  location.loc := LOC_FPUREGISTER;
                  location.loc := LOC_FPUREGISTER;
                end;
                end;
             end;
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16,
-          fpu_fpv4_s16:
+          fpu_vfp_first..fpu_vfp_last:
             begin
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location_copy(location,left.location);
               location_copy(location,left.location);
@@ -127,18 +123,15 @@ implementation
               fpu_fpa10,
               fpu_fpa10,
               fpu_fpa11:
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                expectloc:=LOC_MMREGISTER;
-              fpu_fpv4_s16:
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
+              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
                 begin
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
                     expectloc:=LOC_MMREGISTER
                   else
                   else
                     exit(inherited first_abs_real);
                     exit(inherited first_abs_real);
-                end;
+                end
               else
               else
                 internalerror(2009112401);
                 internalerror(2009112401);
             end;
             end;
@@ -158,18 +151,15 @@ implementation
               fpu_fpa10,
               fpu_fpa10,
               fpu_fpa11:
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                expectloc:=LOC_MMREGISTER;
-              fpu_fpv4_s16:
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
+              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
                 begin
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
                     expectloc:=LOC_MMREGISTER
                   else
                   else
                     exit(inherited first_sqr_real);
                     exit(inherited first_sqr_real);
-                end;
+                end
               else
               else
                 internalerror(2009112402);
                 internalerror(2009112402);
             end;
             end;
@@ -189,18 +179,15 @@ implementation
               fpu_fpa10,
               fpu_fpa10,
               fpu_fpa11:
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                expectloc:=LOC_MMREGISTER;
-              fpu_fpv4_s16:
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
+              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
                 begin
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
                     expectloc:=LOC_MMREGISTER
                   else
                   else
                     exit(inherited first_sqrt_real);
                     exit(inherited first_sqrt_real);
-                end;
+                end
               else
               else
                 internalerror(2009112403);
                 internalerror(2009112403);
             end;
             end;
@@ -262,28 +249,29 @@ implementation
           fpu_fpa10,
           fpu_fpa10,
           fpu_fpa11:
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          fpu_soft:
+            begin
+              if singleprec then
+                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
+              else
+                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
+            end
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               if singleprec then
               if singleprec then
                 pf:=PF_F32
                 pf:=PF_F32
               else
               else
                 pf:=PF_F64;
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
-            end;
-          fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
-          fpu_soft:
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
             begin
-              if singleprec then
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
-              else
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
+              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
             end
-        else
-          internalerror(2009111402);
+          else
+            internalerror(2009111402);
         end;
         end;
       end;
       end;
 
 
@@ -299,21 +287,22 @@ implementation
           fpu_fpa10,
           fpu_fpa10,
           fpu_fpa11:
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               if singleprec then
               if singleprec then
                 pf:=PF_F32
                 pf:=PF_F32
               else
               else
                 pf:=PF_F64;
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
-            end;
-          fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
-        else
-          internalerror(2009111403);
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+            begin
+              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else
+            internalerror(2009111403);
         end;
         end;
       end;
       end;
 
 
@@ -329,21 +318,22 @@ implementation
           fpu_fpa10,
           fpu_fpa10,
           fpu_fpa11:
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
             begin
               if singleprec then
               if singleprec then
                 pf:=PF_F32
                 pf:=PF_F32
               else
               else
                 pf:=PF_F64;
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
-            end;
-          fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
-        else
-          internalerror(2009111402);
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+            begin
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else
+            internalerror(2009111402);
         end;
         end;
       end;
       end;
 
 
@@ -397,7 +387,7 @@ implementation
                 begin
                 begin
                   r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
                   r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
                   cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
                   cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
-                  reference_reset_base(ref,r,0,left.location.reference.alignment,location.reference.volatility);
+                  reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
                   { since the address might be nil we can't use ldr for older cpus }
                   { since the address might be nil we can't use ldr for older cpus }
                   current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
                   current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
                 end;
                 end;
@@ -457,7 +447,7 @@ implementation
         negproduct : boolean;
         negproduct : boolean;
         oppostfix : TOpPostfix;
         oppostfix : TOpPostfix;
       begin
       begin
-         if current_settings.fputype in [fpu_vfpv4] then
+         if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
            begin
            begin
              negop3:=false;
              negop3:=false;
              negproduct:=false;
              negproduct:=false;
@@ -515,6 +505,7 @@ implementation
                oppostfix:=PF_F32;
                oppostfix:=PF_F32;
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
                location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
                location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
+             cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
            end
          else
          else
            internalerror(2014032301);
            internalerror(2014032301);

+ 172 - 0
compiler/arm/narmld.pas

@@ -0,0 +1,172 @@
+{
+    Copyright (c) 1998-2018 by Florian Klaempfl
+
+    Generate arm assembler for load nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmld;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      globtype,
+      symsym,
+      node,ncgld,pass_1,aasmbase;
+
+    type
+      tarmloadnode = class(tcgloadnode)
+         procedure generate_threadvar_access(gvs : tstaticvarsym); override;
+      end;
+
+
+implementation
+
+    uses
+      globals,verbose,
+      cgbase,cgobj,cgutils,
+      aasmdata,aasmcpu,
+      systems,
+      symcpu,symdef,
+      nld,
+      cpubase,
+      parabase,
+      procinfo;
+
+{*****************************************************************************
+                            TARMLOADNODE
+*****************************************************************************}
+
+    procedure tarmloadnode.generate_threadvar_access(gvs: tstaticvarsym);
+      var
+        href: treference;
+        hregister : tregister;
+        handled: boolean;
+        l : TAsmLabel;
+      begin
+        handled:=false;
+        if tf_section_threadvars in target_info.flags then
+          begin
+            if target_info.system in [system_arm_linux] then
+              begin
+                if not(pi_uses_threadvar in current_procinfo.flags) then
+                  internalerror(2012012101);
+                case current_settings.tlsmodel of
+                  tlsm_global_dynamic:
+                    begin
+{$ifdef use_tls_dialect_gnu}
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsgd;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,hregister,NR_PC,NR_R0);
+                      cg.g_call(current_asmdata.CurrAsmList,'__tls_get_addr');
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=hregister;
+{$else use_tls_dialect_gnu}
+                      { On arm, we use the gnu2 tls dialect. It has the advantage that it can be relaxed (optimized) by the linker,
+                        this is not possible with the gnu tls dialect.
+
+                        gnu2 is proposed and documented in
+                          Glauber de Oliveira Costa, Alexandre Oliva: Speeding Up Thread-Local Storage Access in DynamicLibraries in the ARM platform, 2006.
+                          Link: https://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
+                      }
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsdesc;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,NR_R0);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+
+                      { we have to go the ugly way so we can set addr_tlscall }
+                      cg.allocallcpuregisters(current_asmdata.CurrAsmList);
+                      cg.a_call_name(current_asmdata.CurrAsmList,gvs.mangledname,false);
+                      with taicpu(current_asmdata.CurrAsmList.Last) do
+                        begin
+                          if opcode<>A_BL then
+                            Internalerror(2019092902);
+                          oper[0]^.ref^.refaddr:=addr_tlscall;
+                        end;
+                      cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
+
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+{$endif use_tls_dialect_gnu}
+                      handled:=true;
+                    end;
+                  tlsm_initial_exec:
+                    begin
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      reference_reset(href,0,[]);
+                      href.base:=NR_PC;
+                      href.index:=hregister;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  tlsm_local_exec:
+                    begin
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      reference_reset(href,0,[]);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  else
+                    Internalerror(2019092802);
+                end;
+              end;
+          end;
+
+        if not handled then
+          inherited;
+      end;
+
+
+begin
+   cloadnode:=tarmloadnode;
+end.

+ 40 - 30
compiler/arm/narmmat.pas

@@ -51,7 +51,7 @@ interface
 implementation
 implementation
 
 
     uses
     uses
-      globtype,
+      globtype,compinnr,
       cutils,verbose,globals,constexp,
       cutils,verbose,globals,constexp,
       aasmbase,aasmcpu,aasmtai,aasmdata,
       aasmbase,aasmcpu,aasmtai,aasmdata,
       defutil,
       defutil,
@@ -75,7 +75,7 @@ implementation
         if not(cs_check_overflow in current_settings.localswitches) and
         if not(cs_check_overflow in current_settings.localswitches) and
            (right.nodetype=ordconstn) and
            (right.nodetype=ordconstn) and
            (nodetype=divn) and
            (nodetype=divn) and
-           not(is_64bitint(resultdef)) and
+           not(is_64bit(resultdef)) and
            {Only the ARM and thumb2-isa support umull and smull, which are required for arbitary division by const optimization}
            {Only the ARM and thumb2-isa support umull and smull, which are required for arbitary division by const optimization}
            (GenerateArmCode or
            (GenerateArmCode or
             GenerateThumb2Code or
             GenerateThumb2Code or
@@ -87,11 +87,11 @@ implementation
           result:=nil
           result:=nil
         else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
         else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
           (nodetype=divn) and
           (nodetype=divn) and
-          not(is_64bitint(resultdef)) then
+          not(is_64bit(resultdef)) then
           result:=nil
           result:=nil
         else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
         else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
           (nodetype=modn) and
           (nodetype=modn) and
-          not(is_64bitint(resultdef)) then
+          not(is_64bit(resultdef)) then
           begin
           begin
             if (right.nodetype=ordconstn) and
             if (right.nodetype=ordconstn) and
               ispowerof2(tordconstnode(right).value,power) and
               ispowerof2(tordconstnode(right).value,power) and
@@ -164,7 +164,7 @@ implementation
                       cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,helper1);
                       cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,helper1);
                     if GenerateThumbCode then
                     if GenerateThumbCode then
                       begin
                       begin
-                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,32-power,helper1);
+                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,32-power,helper1);
                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ADD,helper2,numerator,helper1));
                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ADD,helper2,numerator,helper1));
                       end
                       end
                     else
                     else
@@ -179,9 +179,12 @@ implementation
                else
                else
                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
              end
              end
-           else {Everything else is handled the generic code}
+           else if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
+             {Everything else is handled the generic code}
              cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
              cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
-               tordconstnode(right).value.svalue,numerator,resultreg);
+               tordconstnode(right).value.svalue,numerator,resultreg)
+           else
+             internalerror(2019012601);
          end;
          end;
 
 
 {
 {
@@ -286,8 +289,7 @@ implementation
                 resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
                 resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
               end;
               end;
 
 
-            if (right.nodetype=ordconstn) and
-               (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
+            if (right.nodetype=ordconstn) then
               begin
               begin
                 if nodetype=divn then
                 if nodetype=divn then
                   genOrdConstNodeDiv
                   genOrdConstNodeDiv
@@ -310,6 +312,8 @@ implementation
 *****************************************************************************}
 *****************************************************************************}
 
 
     procedure tarmnotnode.second_boolean;
     procedure tarmnotnode.second_boolean;
+      var
+        tmpreg : TRegister;
       begin
       begin
         { if the location is LOC_JUMP, we do the secondpass after the
         { if the location is LOC_JUMP, we do the secondpass after the
           labels are allocated
           labels are allocated
@@ -328,7 +332,14 @@ implementation
                 begin
                 begin
                   hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
                   hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
+                  if is_64bit(resultdef) then
+                    begin
+                      tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
+                      { OR low and high parts together }
+                      current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ORR,tmpreg,left.location.register64.reglo,left.location.register64.reghi),PF_S));
+                    end
+                  else
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
                   location_reset(location,LOC_FLAGS,OS_NO);
                   location_reset(location,LOC_FLAGS,OS_NO);
                   location.resflags:=F_EQ;
                   location.resflags:=F_EQ;
                 end;
                 end;
@@ -356,7 +367,7 @@ implementation
             exit;
             exit;
           end;
           end;
 
 
-        if (current_settings.fputype<>fpu_fpv4_s16) or
+        if not(FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) or
           (tfloatdef(resultdef).floattype=s32real) then
           (tfloatdef(resultdef).floattype=s32real) then
           exit(inherited pass_1);
           exit(inherited pass_1);
 
 
@@ -407,10 +418,20 @@ implementation
                 location.register,left.location.register,0),
                 location.register,left.location.register,0),
                 cgsize2fpuoppostfix[def_cgsize(resultdef)]));
                 cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          fpu_soft:
+            begin
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
+              location:=left.location;
+              case location.size of
+                OS_32:
+                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.register);
+                OS_64:
+                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.registerhi);
+              else
+                internalerror(2014033101);
+              end;
+            end
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[init_settings.fputype] then
             begin
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
               location:=left.location;
@@ -424,8 +445,9 @@ implementation
 
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
                 location.register,left.location.register), pf));
                 location.register,left.location.register), pf));
-            end;
-          fpu_fpv4_s16:
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[init_settings.fputype] then
             begin
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
               location:=left.location;
@@ -433,19 +455,7 @@ implementation
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
                 location.register,left.location.register), PF_F32));
                 location.register,left.location.register), PF_F32));
-            end;
-          fpu_soft:
-            begin
-              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
-              location:=left.location;
-              case location.size of
-                OS_32:
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.register);
-                OS_64:
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.registerhi);
-              else
-                internalerror(2014033101);
-              end;
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
             end
           else
           else
             internalerror(2009112602);
             internalerror(2009112602);

+ 40 - 36
compiler/arm/narmset.pas

@@ -41,9 +41,9 @@ interface
        end;
        end;
 
 
       tarmcasenode = class(tcgcasenode)
       tarmcasenode = class(tcgcasenode)
-         procedure optimizevalues(var max_linear_list:aint;var max_dist:aword);override;
+         procedure optimizevalues(var max_linear_list:int64;var max_dist:qword);override;
          function  has_jumptable : boolean;override;
          function  has_jumptable : boolean;override;
-         procedure genjumptable(hp : pcaselabel;min_,max_ : aint);override;
+         procedure genjumptable(hp : pcaselabel;min_,max_ : int64);override;
          procedure genlinearlist(hp : pcaselabel);override;
          procedure genlinearlist(hp : pcaselabel);override;
          procedure genjmptreeentry(p : pcaselabel;parentvalue : TConstExprInt);override;
          procedure genjmptreeentry(p : pcaselabel;parentvalue : TConstExprInt);override;
       end;
       end;
@@ -136,7 +136,7 @@ implementation
                             TARMCASENODE
                             TARMCASENODE
 *****************************************************************************}
 *****************************************************************************}
 
 
-    procedure tarmcasenode.optimizevalues(var max_linear_list:aint;var max_dist:aword);
+    procedure tarmcasenode.optimizevalues(var max_linear_list:int64;var max_dist:qword);
       begin
       begin
         inc(max_linear_list,2)
         inc(max_linear_list,2)
       end;
       end;
@@ -148,7 +148,7 @@ implementation
       end;
       end;
 
 
 
 
-    procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : aint);
+    procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : int64);
       var
       var
         last : TConstExprInt;
         last : TConstExprInt;
         tmpreg,
         tmpreg,
@@ -161,22 +161,30 @@ implementation
 
 
         procedure genitem(list:TAsmList;t : pcaselabel);
         procedure genitem(list:TAsmList;t : pcaselabel);
           var
           var
-            i : aint;
+            i : int64;
           begin
           begin
             if assigned(t^.less) then
             if assigned(t^.less) then
               genitem(list,t^.less);
               genitem(list,t^.less);
             { fill possible hole }
             { fill possible hole }
-            for i:=last.svalue+1 to t^._low.svalue-1 do
-              if cs_create_pic in current_settings.moduleswitches then
-                list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,elselabel,picoffset))
-              else
-                list.concat(Tai_const.Create_sym(elselabel));
-            for i:=t^._low.svalue to t^._high.svalue do
-              if cs_create_pic in current_settings.moduleswitches then
-                list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,blocklabel(t^.blockid),picoffset))
-              else
-                list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
-            last:=t^._high.svalue;
+            i:=last+1;
+            while i<=t^._low-1 do
+              begin
+                if cs_create_pic in current_settings.moduleswitches then
+                  list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,elselabel,picoffset))
+                else
+                  list.concat(Tai_const.Create_sym(elselabel));
+                i:=i+1;
+              end;
+            i:=t^._low;
+            while i<=t^._high do
+              begin
+                if cs_create_pic in current_settings.moduleswitches then
+                  list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,blocklabel(t^.blockid),picoffset))
+                else
+                  list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
+                i:=i+1;
+              end;
+            last:=t^._high;
             if assigned(t^.greater) then
             if assigned(t^.greater) then
               genitem(list,t^.greater);
               genitem(list,t^.greater);
           end;
           end;
@@ -399,7 +407,6 @@ implementation
       procedure tarmcasenode.genjmptreeentry(p : pcaselabel;parentvalue : TConstExprInt);
       procedure tarmcasenode.genjmptreeentry(p : pcaselabel;parentvalue : TConstExprInt);
         var
         var
           lesslabel,greaterlabel : tasmlabel;
           lesslabel,greaterlabel : tasmlabel;
-          less,greater : pcaselabel;
           cond_gt: TResFlags;
           cond_gt: TResFlags;
           cmplow : Boolean;
           cmplow : Boolean;
         begin
         begin
@@ -424,25 +431,22 @@ implementation
           { no range label: }
           { no range label: }
           if p^._low=p^._high then
           if p^._low=p^._high then
             begin
             begin
-               if greaterlabel=lesslabel then
-                 begin
-                   if p^._low-1<>parentvalue then
-                     hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_NE,p^._low,hregister,lesslabel);
-                 end
-               else
-                 begin
-                   cmplow:=p^._low-1<>parentvalue;
-                   if cmplow then
-                     hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_lt,p^._low,hregister,lesslabel);
-                   if p^._high+1<>parentvalue then
-                     begin
-                       if cmplow then
-                         hlcg.a_jmp_flags(current_asmdata.CurrAsmList,cond_gt,greaterlabel)
-                       else
-                         hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_gt,p^._low,hregister,greaterlabel);
-                     end;
-                 end;
-               hlcg.a_jmp_always(current_asmdata.CurrAsmList,blocklabel(p^.blockid));
+              if greaterlabel=lesslabel then
+                hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_NE,p^._low,hregister,lesslabel)
+              else
+                begin
+                  cmplow:=p^._low-1<>parentvalue;
+                  if cmplow then
+                    hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_lt,p^._low,hregister,lesslabel);
+                  if p^._high+1<>parentvalue then
+                    begin
+                      if cmplow then
+                        hlcg.a_jmp_flags(current_asmdata.CurrAsmList,cond_gt,greaterlabel)
+                      else
+                        hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_gt,p^._low,hregister,greaterlabel);
+                    end;
+                end;
+              hlcg.a_jmp_always(current_asmdata.CurrAsmList,blocklabel(p^.blockid));
             end
             end
           else
           else
             begin
             begin

+ 327 - 0
compiler/arm/narmutil.pas

@@ -0,0 +1,327 @@
+{
+    Copyright (c) 2019 by Florian Klämpfl
+
+    ARM version of some node tree helper routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmutil;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    cclasses,ngenutil;
+
+  type
+    tarmnodeutils = class(tnodeutils)
+      class procedure InsertObjectInfo; override;
+      class procedure insert_init_final_table(entries: tfplist); override;
+    end;
+
+
+  implementation
+
+    uses
+      verbose,
+      systems,
+      globals,
+      cpuinfo,cpubase,
+      cgbase,cgutils,
+      aasmbase,aasmdata,aasmtai,aasmcpu,
+      symdef;
+
+    const
+      Tag_File = 1;
+      Tag_Section = 2;
+      Tag_Symbol = 3;
+      Tag_CPU_raw_name = 4;
+      Tag_CPU_name = 5;
+      Tag_CPU_arch = 6;
+      Tag_CPU_arch_profile = 7;
+      Tag_ARM_ISA_use = 8;
+      Tag_THUMB_ISA_use = 9;
+      Tag_FP_Arch = 10;
+      Tag_WMMX_arch = 11;
+      Tag_Advanced_SIMD_arch = 12;
+      Tag_PCS_config = 13;
+      Tag_ABI_PCS_R9_use = 14;
+      Tag_ABI_PCS_RW_data = 15;
+      Tag_ABI_PCS_RO_data = 16;
+      Tag_ABI_PCS_GOT_use = 17;
+      Tag_ABI_PCS_wchar_t = 18;
+      Tag_ABI_FP_rounding = 19;
+      Tag_ABI_FP_denormal = 20;
+      Tag_ABI_FP_exceptions = 21;
+      Tag_ABI_FP_user_exceptions = 22;
+      Tag_ABI_FP_number_model = 23;
+      Tag_ABI_align_needed = 24;
+      Tag_ABI_align8_preserved = 25;
+      Tag_ABI_enum_size = 26;
+      Tag_ABI_HardFP_use = 27;
+      Tag_ABI_VFP_args = 28;
+      Tag_ABI_WMMX_args = 29;
+      Tag_ABI_optimization_goals = 30;
+      Tag_ABI_FP_optimization_goals = 31;
+      Tag_compatiblity = 32;
+      Tag_CPU_unaligned_access = 34;
+      Tag_FP_HP_extension = 36;
+      Tag_ABI_FP_16bit_format = 38;
+      Tag_MPextension_use = 42;
+      Tag_DIV_use = 44;
+      Tag_nodefaults = 64;
+      Tag_also_compatible_with = 65;
+      Tag_conformance = 67;
+      Tag_T2EE_use = 66;
+      Tag_Virtualization_use = 68;
+
+    class procedure tarmnodeutils.InsertObjectInfo;
+      begin
+        inherited InsertObjectInfo;
+        { write eabi attributes to object file? }
+        if (target_info.system in [system_arm_linux]) and (target_info.abi in [abi_eabihf,abi_eabi]) then
+          begin
+            case current_settings.cputype of
+              cpu_armv3:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,''));
+                end;
+              cpu_armv4:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,1));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4'));
+                end;
+              cpu_armv4t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,2));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4T'));
+                end;
+              cpu_armv5t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,3));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5T'));
+                end;
+              cpu_armv5te:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,4));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TE'));
+                end;
+              cpu_armv5tej:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,5));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TEJ'));
+                end;
+              cpu_armv6:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,6));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6'));
+                end;
+              cpu_armv6k:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,9));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6K'));
+                end;
+              cpu_armv6t2:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,8));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'T2'));
+                end;
+              cpu_armv6z:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,7));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6Z'));
+                end;
+              cpu_armv6m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,11));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6-M'));
+                end;
+              cpu_armv7:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7'));
+                end;
+              cpu_armv7a:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$41));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-A'));
+                end;
+              cpu_armv7r:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$52));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-R'));
+                end;
+              cpu_armv7m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-M'));
+                end;
+              cpu_armv7em:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,13));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7E-M'));
+                end;
+              else
+                Internalerror(2019100602);
+            end;
+            case current_settings.fputype of
+              fpu_soft,
+              fpu_libgcc:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,0));
+              fpu_vfpv2:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,2));
+              fpu_vfpv3,
+              fpu_neon_vfpv3:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,3));
+              fpu_vfpv3_d16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,4));
+              fpu_fpv4_s16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,6));
+              fpu_vfpv4,
+              fpu_neon_vfpv4:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,5));
+              else
+                Internalerror(2019100603);
+            end;
+            if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,2))
+            else if FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,1))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ARM_ISA_use,1));
+            if CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,2))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_VFP_args,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_denormal,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_exceptions,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_number_model,3));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align_needed,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align8_preserved,1));
+            { gcc typically writes more like enum size, wchar size, optimization goal, however, this
+              is normally not module global in FPC }
+          end;
+      end;
+
+    class procedure tarmnodeutils.insert_init_final_table(entries:tfplist);
+
+      procedure genentry(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R14]),PF_FD));
+            end;
+        end;
+
+      procedure genexit(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R15]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R15]),PF_FD));
+            end;
+        end;
+
+      var
+        initList, finalList, header: TAsmList;
+        entry : pinitfinalentry;
+        i : longint;
+      begin
+        if not(tf_init_final_units_by_calls in target_info.flags) then
+          begin
+            inherited insert_init_final_table(entries);
+            exit;
+          end;
+        initList:=TAsmList.create;
+        finalList:=TAsmList.create;
+
+        genentry(finalList);
+        genentry(initList);
+
+        for i:=0 to entries.count-1 do
+          begin
+            entry:=pinitfinalentry(entries[i]);
+            if entry^.finifunc<>'' then
+              finalList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.finifunc,AT_FUNCTION)));
+            if entry^.initfunc<>'' then
+              initList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.initfunc,AT_FUNCTION)));
+          end;
+
+        genexit(finalList);
+        genexit(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_INIT_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_INIT_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        initList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_FINALIZE_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_FINALIZE_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        finalList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(finalList);
+
+        initList.Free;
+        finalList.Free;
+
+        inherited insert_init_final_table(entries);
+      end;
+
+  begin
+    cnodeutils:=tarmnodeutils;
+  end.
+

+ 17 - 12
compiler/arm/raarmgas.pas

@@ -65,10 +65,10 @@ Unit raarmgas;
       globtype,globals,verbose,
       globtype,globals,verbose,
       systems,aasmbase,aasmtai,aasmdata,aasmcpu,
       systems,aasmbase,aasmtai,aasmdata,aasmcpu,
       { symtable }
       { symtable }
-      symconst,symsym,
+      symconst,symsym,symdef,
       procinfo,
       procinfo,
       rabase,rautils,
       rabase,rautils,
-      cgbase,cgutils;
+      cgbase,cgutils,paramgr;
 
 
 
 
     function tarmunifiedattreader.is_unified: boolean;
     function tarmunifiedattreader.is_unified: boolean;
@@ -147,6 +147,7 @@ Unit raarmgas;
           end;
           end;
       end;
       end;
 
 
+
     function tarmattreader.is_targetdirective(const s: string): boolean;
     function tarmattreader.is_targetdirective(const s: string): boolean;
       begin
       begin
         case s of
         case s of
@@ -163,7 +164,7 @@ Unit raarmgas;
     procedure tarmattreader.ReadSym(oper : tarmoperand);
     procedure tarmattreader.ReadSym(oper : tarmoperand);
       var
       var
          tempstr, mangledname : string;
          tempstr, mangledname : string;
-         typesize,l,k : longint;
+         typesize,l,k : tcgint;
       begin
       begin
         tempstr:=actasmpattern;
         tempstr:=actasmpattern;
         Consume(AS_ID);
         Consume(AS_ID);
@@ -310,7 +311,7 @@ Unit raarmgas;
       procedure read_index(require_rbracket : boolean);
       procedure read_index(require_rbracket : boolean);
         var
         var
           recname : string;
           recname : string;
-          o_int,s_int : aint;
+          o_int,s_int : tcgint;
         begin
         begin
           case actasmtoken of
           case actasmtoken of
             AS_REGISTER :
             AS_REGISTER :
@@ -576,7 +577,7 @@ Unit raarmgas;
     Procedure tarmattreader.BuildOperand(oper : tarmoperand);
     Procedure tarmattreader.BuildOperand(oper : tarmoperand);
       var
       var
         expr : string;
         expr : string;
-        typesize,l : longint;
+        typesize,l : tcgint;
 
 
 
 
         procedure AddLabelOperand(hl:tasmlabel);
         procedure AddLabelOperand(hl:tasmlabel);
@@ -607,7 +608,7 @@ Unit raarmgas;
             hasdot  : boolean;
             hasdot  : boolean;
             l,
             l,
             toffset,
             toffset,
-            tsize   : longint;
+            tsize   : tcgint;
           begin
           begin
             if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
             if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
              exit;
              exit;
@@ -634,10 +635,8 @@ Unit raarmgas;
                   { don't allow direct access to fields of parameters, because that
                   { don't allow direct access to fields of parameters, because that
                     will generate buggy code. Allow it only for explicit typecasting }
                     will generate buggy code. Allow it only for explicit typecasting }
                   if hasdot and
                   if hasdot and
-                     (not oper.hastype) and
-                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
-                     (current_procinfo.procdef.proccalloption<>pocall_register) then
-                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                     (not oper.hastype) then
+                     checklocalsubscript(oper.opr.localsym);
                   inc(oper.opr.localsymofs,l)
                   inc(oper.opr.localsymofs,l)
                 end;
                 end;
               OPR_CONSTANT :
               OPR_CONSTANT :
@@ -725,6 +724,8 @@ Unit raarmgas;
                         end;
                         end;
                     end;
                     end;
                 end;
                 end;
+              else
+               ;
             end;
             end;
           end;
           end;
 
 
@@ -803,7 +804,7 @@ Unit raarmgas;
           var
           var
             symtype: TAsmsymtype;
             symtype: TAsmsymtype;
             sym: string;
             sym: string;
-            val: aint;
+            val: tcgint;
           begin
           begin
             case actasmtoken of
             case actasmtoken of
               AS_INTNUM,
               AS_INTNUM,
@@ -818,6 +819,8 @@ Unit raarmgas;
                   oper.opr.ref.base:=NR_PC;
                   oper.opr.ref.base:=NR_PC;
                   oper.opr.ref.symbol:=GetConstLabel(sym,val);
                   oper.opr.ref.symbol:=GetConstLabel(sym,val);
                 end;
                 end;
+              else
+                ;
             end;
             end;
           end;
           end;
 
 
@@ -1144,6 +1147,8 @@ Unit raarmgas;
               else
               else
                 Message(asmr_e_invalid_operand_type); // Otherwise it would have been seen as a AS_REGISTER
                 Message(asmr_e_invalid_operand_type); // Otherwise it would have been seen as a AS_REGISTER
             end;
             end;
+          else
+            Message(asmr_e_invalid_operand_type);
         end;
         end;
       end;
       end;
 
 
@@ -1434,7 +1439,7 @@ Unit raarmgas;
       var
       var
         symname,
         symname,
         symval  : String;
         symval  : String;
-        val     : aint;
+        val     : tcgint;
         symtyp  : TAsmsymtype;
         symtyp  : TAsmsymtype;
       begin
       begin
         case actasmpattern of
         case actasmpattern of

+ 14 - 6
compiler/arm/rgcpu.pas

@@ -166,6 +166,8 @@ unit rgcpu;
                     if current_procinfo.framepointer<>r then
                     if current_procinfo.framepointer<>r then
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                   end;
                   end;
+              else
+                ;
             end;
             end;
           end;
           end;
       end;
       end;
@@ -195,7 +197,7 @@ unit rgcpu;
           {$endif}
           {$endif}
           cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
           cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
           cg.a_op_reg_reg(helplist,OP_ADD,OS_ADDR,current_procinfo.framepointer,hreg);
           cg.a_op_reg_reg(helplist,OP_ADD,OS_ADDR,current_procinfo.framepointer,hreg);
-          reference_reset_base(tmpref,hreg,0,sizeof(aint),[]);
+          reference_reset_base(tmpref,hreg,0,spilltemp.temppos,sizeof(aint),[]);
         end
         end
       else if is_shifter_const(a and not($FFF), immshift) then
       else if is_shifter_const(a and not($FFF), immshift) then
         if spilltemp.offset > 0 then
         if spilltemp.offset > 0 then
@@ -205,7 +207,7 @@ unit rgcpu;
             {$endif}
             {$endif}
             helplist.concat(taicpu.op_reg_reg_const(A_ADD, hreg, current_procinfo.framepointer,
             helplist.concat(taicpu.op_reg_reg_const(A_ADD, hreg, current_procinfo.framepointer,
                                                       a and not($FFF)));
                                                       a and not($FFF)));
-            reference_reset_base(tmpref, hreg, a and $FFF, sizeof(aint),[]);
+            reference_reset_base(tmpref, hreg, a and $FFF, spilltemp.temppos, sizeof(aint),[]);
           end
           end
         else
         else
           begin
           begin
@@ -214,7 +216,7 @@ unit rgcpu;
             {$endif}
             {$endif}
             helplist.concat(taicpu.op_reg_reg_const(A_SUB, hreg, current_procinfo.framepointer,
             helplist.concat(taicpu.op_reg_reg_const(A_SUB, hreg, current_procinfo.framepointer,
                                                       a and not($FFF)));
                                                       a and not($FFF)));
-            reference_reset_base(tmpref, hreg, -(a and $FFF), sizeof(aint),[]);
+            reference_reset_base(tmpref, hreg, -(a and $FFF), spilltemp.temppos, sizeof(aint),[]);
           end
           end
       else
       else
         begin
         begin
@@ -222,7 +224,7 @@ unit rgcpu;
           helplist.concat(tai_comment.create(strpnew('Spilling: Use a_load_const_reg to fix spill offset')));
           helplist.concat(tai_comment.create(strpnew('Spilling: Use a_load_const_reg to fix spill offset')));
           {$endif}
           {$endif}
           cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
           cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
-          reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(aint),[]);
+          reference_reset_base(tmpref,current_procinfo.framepointer,0,spilltemp.temppos,sizeof(aint),[]);
           tmpref.index:=hreg;
           tmpref.index:=hreg;
         end;
         end;
 
 
@@ -353,6 +355,8 @@ unit rgcpu;
                 RS_S21,RS_S23,RS_S25,RS_S27,RS_S29,RS_S31] do
                 RS_S21,RS_S23,RS_S25,RS_S27,RS_S29,RS_S31] do
                 add_edge(supreg,i);
                 add_edge(supreg,i);
             end;
             end;
+          else
+            ;
         end;
         end;
       end;
       end;
 
 
@@ -483,7 +487,7 @@ unit rgcpu;
             tmpref.base:=NR_R15;
             tmpref.base:=NR_R15;
             helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
             helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
 
 
-            reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(aint),[]);
+            reference_reset_base(tmpref,current_procinfo.framepointer,0,ctempposinvalid,sizeof(aint),[]);
             tmpref.index:=hreg;
             tmpref.index:=hreg;
 
 
             if spilltemp.index<>NR_NO then
             if spilltemp.index<>NR_NO then
@@ -543,7 +547,7 @@ unit rgcpu;
             if spilltemp.index<>NR_NO then
             if spilltemp.index<>NR_NO then
               internalerror(200401263);
               internalerror(200401263);
 
 
-            reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(pint),[]);
+            reference_reset_base(tmpref,current_procinfo.framepointer,0,ctempposinvalid,sizeof(pint),[]);
             tmpref.index:=hreg;
             tmpref.index:=hreg;
 
 
             helplist.concat(spilling_create_store(tempreg,tmpref));
             helplist.concat(spilling_create_store(tempreg,tmpref));
@@ -606,6 +610,8 @@ unit rgcpu;
                     if current_procinfo.framepointer<>r then
                     if current_procinfo.framepointer<>r then
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                   end;
                   end;
+              else
+                ;
             end;
             end;
           end;
           end;
       end;
       end;
@@ -658,6 +664,8 @@ unit rgcpu;
                        add_edge(getsupreg(taicpu(p).oper[0]^.reg),i);
                        add_edge(getsupreg(taicpu(p).oper[0]^.reg),i);
                      end;
                      end;
                  end;
                  end;
+              else
+                ;
             end;
             end;
           end;
           end;
       end;
       end;

+ 2 - 2
compiler/arm/symcpu.pas

@@ -101,7 +101,7 @@ type
     { library symbol for AROS }
     { library symbol for AROS }
     libsym : tsym;
     libsym : tsym;
     libsymderef : tderef;
     libsymderef : tderef;
-    function getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp): tstoreddef; override;
+    function getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp; const paraprefix: string): tstoreddef; override;
     procedure buildderef; override;
     procedure buildderef; override;
     procedure deref; override;
     procedure deref; override;
   end;
   end;
@@ -208,7 +208,7 @@ implementation
     end;
     end;
 
 
 
 
-  function tcpuprocdef.getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp): tstoreddef;
+  function tcpuprocdef.getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp; const paraprefix: string): tstoreddef;
     begin
     begin
       result:=inherited;
       result:=inherited;
       if newtyp=procdef then
       if newtyp=procdef then

+ 121 - 0
compiler/armgen/armpara.pas

@@ -0,0 +1,121 @@
+{
+    Copyright (c) 2019 by Jonas Maebe
+
+    ARM and AArch64 common parameter helpers
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ****************************************************************************
+}
+unit armpara;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  symtype,
+  paramgr;
+
+type
+  tarmgenparamanager = class(tparamanager)
+   protected
+    { Returns whether a def is a "homogeneous float array" at the machine level.
+      This means that in the memory layout, the def only consists of maximally
+      4 floating point values that appear consecutively in memory }
+    function is_hfa(p: tdef; out basedef: tdef) : boolean;
+   private
+    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
+  end;
+
+
+implementation
+
+  uses
+    symconst,symdef,symsym,symutil,defutil;
+
+
+  function tarmgenparamanager.is_hfa(p: tdef; out basedef: tdef): boolean;
+    var
+      elecount: longint;
+    begin
+      result:=false;
+      basedef:=nil;
+      elecount:=0;
+      result:=is_hfa_internal(p,basedef,elecount);
+      result:=
+        result and
+        (elecount>0) and
+        (elecount<=4) and
+        (p.size=basedef.size*elecount)
+      end;
+
+
+  function tarmgenparamanager.is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
+    var
+      i: longint;
+      sym: tsym;
+      tmpelecount: longint;
+    begin
+      result:=false;
+      case p.typ of
+        arraydef:
+          begin
+            if is_special_array(p) then
+              exit;
+            { an array of empty records has no influence }
+            if tarraydef(p).elementdef.size=0 then
+              begin
+                result:=true;
+                exit
+              end;
+            tmpelecount:=0;
+            if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
+              exit;
+            { tmpelecount now contains the number of hfa elements in a
+              single array element (e.g. 2 if it's an array of a record
+              containing two singles) -> multiply by number of elements
+              in the array }
+            inc(elecount,tarraydef(p).elecount*tmpelecount);
+            if elecount>4 then
+              exit;
+            result:=true;
+          end;
+        floatdef:
+          begin
+            if not assigned(basedef) then
+              basedef:=p
+            else if basedef<>p then
+              exit;
+            inc(elecount);
+            result:=true;
+          end;
+        recorddef:
+          begin
+            for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
+              begin
+                sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
+                if not is_normal_fieldvarsym(sym) then
+                  continue;
+                if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
+                  exit
+              end;
+            result:=true;
+          end;
+        else
+          exit
+      end;
+    end;
+
+end.

+ 279 - 18
compiler/assemble.pas

@@ -156,9 +156,12 @@ interface
         function single2str(d : single) : string; virtual;
         function single2str(d : single) : string; virtual;
         function double2str(d : double) : string; virtual;
         function double2str(d : double) : string; virtual;
         function extended2str(e : extended) : string; virtual;
         function extended2str(e : extended) : string; virtual;
-        Function DoPipe:boolean;
+        Function DoPipe:boolean; virtual;
 
 
         function CreateNewAsmWriter: TExternalAssemblerOutputFile; virtual;
         function CreateNewAsmWriter: TExternalAssemblerOutputFile; virtual;
+
+        {# Return true if the external assembler should run again }
+        function RerunAssembler: boolean; virtual;
       public
       public
 
 
         {# Returns the complete path and executable name of the assembler
         {# Returns the complete path and executable name of the assembler
@@ -250,11 +253,19 @@ Implementation
 {$ifdef memdebug}
 {$ifdef memdebug}
       cclasses,
       cclasses,
 {$endif memdebug}
 {$endif memdebug}
-      script,fmodule,verbose,
+{$ifdef OMFOBJSUPPORT}
+      omfbase,
+      ogomf,
+{$endif OMFOBJSUPPORT}
+{$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+      sfpux80,
+{$endif FPC_SOFT_FPUX80}
+{$endif}
+      cscript,fmodule,verbose,
       cpuinfo,
       cpuinfo,
-      aasmcpu,
-      owar,owomflib
-      ;
+      aasmcpu;
 
 
     var
     var
       CAssembler : array[tasm] of TAssemblerClass;
       CAssembler : array[tasm] of TAssemblerClass;
@@ -410,6 +421,7 @@ Implementation
         s: ansistring;
         s: ansistring;
       begin
       begin
         MaybeAddLinePrefix;
         MaybeAddLinePrefix;
+        s:='';
         setlength(s,len);
         setlength(s,len);
         move(p^,s[1],len);
         move(p^,s[1],len);
         AsmWriteAnsiStringUnfiltered(decorator.LineFilter(s));
         AsmWriteAnsiStringUnfiltered(decorator.LineFilter(s));
@@ -730,9 +742,13 @@ Implementation
 
 
     Function TExternalAssembler.DoPipe:boolean;
     Function TExternalAssembler.DoPipe:boolean;
       begin
       begin
+{$ifdef hasunix}
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
                 ((asminfo^.id in [as_gas,as_ggas,as_darwin,as_powerpc_xcoff,as_clang,as_solaris_as]));
                 ((asminfo^.id in [as_gas,as_ggas,as_darwin,as_powerpc_xcoff,as_clang,as_solaris_as]));
+{$else hasunix}
+        DoPipe:=false;
+{$endif}
       end;
       end;
 
 
 
 
@@ -869,7 +885,7 @@ Implementation
 
 
     Function TExternalAssembler.DoAssemble:boolean;
     Function TExternalAssembler.DoAssemble:boolean;
       begin
       begin
-        DoAssemble:=true;
+        result:=true;
         if DoPipe then
         if DoPipe then
          exit;
          exit;
         if not(cs_asm_extern in current_settings.globalswitches) then
         if not(cs_asm_extern in current_settings.globalswitches) then
@@ -883,13 +899,13 @@ Implementation
            Message1(exec_i_assembling,name);
            Message1(exec_i_assembling,name);
          end;
          end;
 
 
-        if CallAssembler(FindAssembler,MakeCmdLine) then
-         writer.RemoveAsm
+        repeat
+          result:=CallAssembler(FindAssembler,MakeCmdLine)
+        until not(result) or not RerunAssembler;
+        if result then
+          writer.RemoveAsm
         else
         else
-         begin
-            DoAssemble:=false;
-            GenerateError;
-         end;
+          GenerateError;
       end;
       end;
 
 
 
 
@@ -967,6 +983,12 @@ Implementation
       end;
       end;
 
 
 
 
+    function TExternalAssembler.RerunAssembler: boolean;
+      begin
+        result:=false;
+      end;
+
+
     procedure TExternalAssembler.WriteSourceLine(hp: tailineinfo);
     procedure TExternalAssembler.WriteSourceLine(hp: tailineinfo);
       var
       var
         module : tmodule;
         module : tmodule;
@@ -1047,6 +1069,10 @@ Implementation
         ccomp: comp;
         ccomp: comp;
 {$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
 {$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
         eextended: extended;
         eextended: extended;
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+	eextended: floatx80;
+{$endif}
 {$endif cpuextended}
 {$endif cpuextended}
       begin
       begin
         if do_line then
         if do_line then
@@ -1060,6 +1086,20 @@ Implementation
               { can't write full 80 bit floating point constants yet on non-x86 }
               { can't write full 80 bit floating point constants yet on non-x86 }
               aitrealconst_s80bit:
               aitrealconst_s80bit:
                 writer.AsmWriteLn(asminfo^.comment+'value: '+extended2str(tai_realconst(hp).value.s80val));
                 writer.AsmWriteLn(asminfo^.comment+'value: '+extended2str(tai_realconst(hp).value.s80val));
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+{$push}{$warn 6018 off} { Unreachable code due to compile time evaluation }
+             aitrealconst_s80bit:
+               begin
+     	         if sizeof(tai_realconst(hp).value.s80val) = sizeof(double) then
+                   writer.AsmWriteLn(asminfo^.comment+'value: '+double2str(tai_realconst(hp).value.s80val))
+     	         else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
+                   writer.AsmWriteLn(asminfo^.comment+'value: '+single2str(tai_realconst(hp).value.s80val))
+                else
+     	         internalerror(2017091901);
+       	      end;
+{$pop}
+{$endif}
 {$endif cpuextended}
 {$endif cpuextended}
               aitrealconst_s64comp:
               aitrealconst_s64comp:
                 writer.AsmWriteLn(asminfo^.comment+'value: '+extended2str(tai_realconst(hp).value.s64compval));
                 writer.AsmWriteLn(asminfo^.comment+'value: '+extended2str(tai_realconst(hp).value.s64compval));
@@ -1089,6 +1129,21 @@ Implementation
               eextended:=extended(tai_realconst(hp).value.s80val);
               eextended:=extended(tai_realconst(hp).value.s80val);
               pdata:=@eextended;
               pdata:=@eextended;
             end;
             end;
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+{$push}{$warn 6018 off} { Unreachable code due to compile time evaluation }
+          aitrealconst_s80bit:
+            begin
+	      if sizeof(tai_realconst(hp).value.s80val) = sizeof(double) then
+                eextended:=float64_to_floatx80(float64(double(tai_realconst(hp).value.s80val)))
+	      else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
+	        eextended:=float32_to_floatx80(float32(single(tai_realconst(hp).value.s80val)))
+	      else
+	        internalerror(2017091901);
+              pdata:=@eextended;
+            end;
+{$pop}
+{$endif}
 {$endif cpuextended}
 {$endif cpuextended}
           aitrealconst_s64comp:
           aitrealconst_s64comp:
             begin
             begin
@@ -1503,6 +1558,7 @@ Implementation
         objsym,
         objsym,
         objsymend : TObjSymbol;
         objsymend : TObjSymbol;
         cpu: tcputype;
         cpu: tcputype;
+        eabi_section, TmpSection: TObjSection;
       begin
       begin
         while assigned(hp) do
         while assigned(hp) do
          begin
          begin
@@ -1560,9 +1616,11 @@ Implementation
                                     (objsym.objsection<>ObjData.CurrObjSec) then
                                     (objsym.objsection<>ObjData.CurrObjSec) then
                                    InternalError(200404124);
                                    InternalError(200404124);
                                end
                                end
+{$push} {$R-}{$Q-}
                              else
                              else
                                Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                                Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                            end;
                            end;
+{$pop}
                        end;
                        end;
                    end;
                    end;
                  ObjData.alloc(tai_const(hp).size);
                  ObjData.alloc(tai_const(hp).size);
@@ -1593,6 +1651,11 @@ Implementation
                              break;
                              break;
                            end;
                            end;
                      end;
                      end;
+{$ifdef OMFOBJSUPPORT}
+                   asd_omf_linnum_line:
+                     { ignore for now, but should be added}
+                     ;
+{$endif OMFOBJSUPPORT}
 {$ifdef ARM}
 {$ifdef ARM}
                    asd_thumb_func:
                    asd_thumb_func:
                      ObjData.ThumbFunc:=true;
                      ObjData.ThumbFunc:=true;
@@ -1600,13 +1663,20 @@ Implementation
                      { ai_directive(hp).name can be only 16 or 32, this is checked by the reader }
                      { ai_directive(hp).name can be only 16 or 32, this is checked by the reader }
                      ObjData.ThumbFunc:=tai_directive(hp).name='16';
                      ObjData.ThumbFunc:=tai_directive(hp).name='16';
 {$endif ARM}
 {$endif ARM}
+{$ifdef RISCV}
+                   asd_option:
+                     internalerror(2019031701);
+{$endif RISCV}
                    else
                    else
                      internalerror(2010011101);
                      internalerror(2010011101);
                  end;
                  end;
                end;
                end;
              ait_section:
              ait_section:
                begin
                begin
-                 ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
+                 if Tai_section(hp).sectype=sec_user then
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).secflags,Tai_section(hp).secprogbits,Tai_section(hp).name^,Tai_section(hp).secorder)
+                 else
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
                  Tai_section(hp).sec:=ObjData.CurrObjSec;
                  Tai_section(hp).sec:=ObjData.CurrObjSec;
                end;
                end;
              ait_symbol :
              ait_symbol :
@@ -1630,6 +1700,30 @@ Implementation
              ait_cutobject :
              ait_cutobject :
                if SmartAsm then
                if SmartAsm then
                 break;
                 break;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   begin
+                     TmpSection:=ObjData.CurrObjSec;
+                     ObjData.CreateSection(sec_arm_attribute,[],SPB_ARM_ATTRIBUTES,'',secorder_default);
+                     eabi_section:=ObjData.CurrObjSec;
+                     ObjData.setsection(TmpSection);
+                   end;
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100701);
+                 end;
+               end;
+             else
+               ;
            end;
            end;
            hp:=Tai(hp.next);
            hp:=Tai(hp.next);
          end;
          end;
@@ -1642,6 +1736,7 @@ Implementation
         objsym,
         objsym,
         objsymend : TObjSymbol;
         objsymend : TObjSymbol;
         cpu: tcputype;
         cpu: tcputype;
+        eabi_section: TObjSection;
       begin
       begin
         while assigned(hp) do
         while assigned(hp) do
          begin
          begin
@@ -1653,6 +1748,13 @@ Implementation
                      { here we must determine the fillsize which is used in pass2 }
                      { here we must determine the fillsize which is used in pass2 }
                      Tai_align_abstract(hp).fillsize:=align(ObjData.CurrObjSec.Size,Tai_align_abstract(hp).aligntype)-
                      Tai_align_abstract(hp).fillsize:=align(ObjData.CurrObjSec.Size,Tai_align_abstract(hp).aligntype)-
                        ObjData.CurrObjSec.Size;
                        ObjData.CurrObjSec.Size;
+
+                     { maximum number of bytes for alignment exeeded? }
+                     if (Tai_align_abstract(hp).aligntype<>Tai_align_abstract(hp).maxbytes) and
+                       (Tai_align_abstract(hp).fillsize>Tai_align_abstract(hp).maxbytes) then
+                       Tai_align_abstract(hp).fillsize:=align(ObjData.CurrObjSec.Size,Byte(Tai_align_abstract(hp).aligntype div 2))-
+                         ObjData.CurrObjSec.Size;
+
                      ObjData.alloc(Tai_align_abstract(hp).fillsize);
                      ObjData.alloc(Tai_align_abstract(hp).fillsize);
                    end;
                    end;
                end;
                end;
@@ -1689,15 +1791,25 @@ Implementation
                    begin
                    begin
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
-                     if objsymend.objsection<>objsym.objsection then
+                     if Tai_const(hp).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092801);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
                        begin
                        begin
                          if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
                          if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
                             (objsym.objsection<>ObjData.CurrObjSec) then
                             (objsym.objsection<>ObjData.CurrObjSec) then
                            internalerror(200905042);
                            internalerror(200905042);
                        end
                        end
+{$push} {$R-}{$Q-}
                      else
                      else
                        Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                        Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
                    end;
+{$pop}
+                 if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) then
+                   Tai_const(hp).fixsize;
                  ObjData.alloc(tai_const(hp).size);
                  ObjData.alloc(tai_const(hp).size);
                end;
                end;
              ait_section:
              ait_section:
@@ -1746,6 +1858,14 @@ Implementation
                    asd_code:
                    asd_code:
                      { ignore for now, but should be added}
                      { ignore for now, but should be added}
                      ;
                      ;
+                   asd_option:
+                     { ignore for now, but should be added}
+                     ;
+{$ifdef OMFOBJSUPPORT}
+                   asd_omf_linnum_line:
+                     { ignore for now, but should be added}
+                     ;
+{$endif OMFOBJSUPPORT}
                    asd_cpu:
                    asd_cpu:
                      begin
                      begin
                        ObjData.CPUType:=cpu_none;
                        ObjData.CPUType:=cpu_none;
@@ -1760,6 +1880,25 @@ Implementation
                      internalerror(2010011102);
                      internalerror(2010011102);
                  end;
                  end;
                end;
                end;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100702);
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100703);
+                 end;
+               end;
+             else
+               ;
            end;
            end;
            hp:=Tai(hp.next);
            hp:=Tai(hp.next);
          end;
          end;
@@ -1782,10 +1921,18 @@ Implementation
         ddouble : double;
         ddouble : double;
         {$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
         {$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
         eextended : extended;
         eextended : extended;
+	{$else}
+        {$ifdef FPC_SOFT_FPUX80}
+	eextended : floatx80;
+        {$endif}
         {$endif}
         {$endif}
         ccomp : comp;
         ccomp : comp;
         tmp    : word;
         tmp    : word;
         cpu: tcputype;
         cpu: tcputype;
+        ddword : dword;
+        eabi_section: TObjSection;
+        s: String;
+        TmpDataPos: TObjSectionOfs;
       begin
       begin
         fillchar(zerobuf,sizeof(zerobuf),0);
         fillchar(zerobuf,sizeof(zerobuf),0);
         fillchar(objsym,sizeof(objsym),0);
         fillchar(objsym,sizeof(objsym),0);
@@ -1852,6 +1999,21 @@ Implementation
                        eextended:=extended(tai_realconst(hp).value.s80val);
                        eextended:=extended(tai_realconst(hp).value.s80val);
                        pdata:=@eextended;
                        pdata:=@eextended;
                      end;
                      end;
+         {$else}
+         {$ifdef FPC_SOFT_FPUX80}
+           {$push}{$warn 6018 off} { Unreachable code due to compile time evaluation }
+                   aitrealconst_s80bit:
+                     begin
+		       if sizeof(tai_realconst(hp).value.s80val) = sizeof(double) then
+                         eextended:=float64_to_floatx80(float64(double(tai_realconst(hp).value.s80val)))
+		       else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
+			 eextended:=float32_to_floatx80(float32(single(tai_realconst(hp).value.s80val)))
+		       else
+			 internalerror(2017091901);
+                       pdata:=@eextended;
+                     end;
+           {$pop}
+	 {$endif}
          {$endif cpuextended}
          {$endif cpuextended}
                    aitrealconst_s64comp:
                    aitrealconst_s64comp:
                      begin
                      begin
@@ -1877,8 +2039,29 @@ Implementation
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
                      relative_reloc:=(objsym.objsection<>objsymend.objsection);
                      relative_reloc:=(objsym.objsection<>objsymend.objsection);
-                     Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
+                     if Tai_const(hp).consttype in [aitconst_gottpoff] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092802);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if Tai_const(hp).consttype in [aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092802);
+                         Tai_const(hp).value:=ObjData.CurrObjSec.Size-objsymend.address+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
+                       begin
+                         if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
+                            (objsym.objsection<>ObjData.CurrObjSec) then
+                           internalerror(2019010301);
+                       end
+                     else
+{$push} {$R-}{$Q-}
+                       Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
                    end;
+{$pop}
                  case tai_const(hp).consttype of
                  case tai_const(hp).consttype of
                    aitconst_64bit,
                    aitconst_64bit,
                    aitconst_32bit,
                    aitconst_32bit,
@@ -1931,17 +2114,35 @@ Implementation
 {$ifdef arm}
 {$ifdef arm}
                    aitconst_got:
                    aitconst_got:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOT32);
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOT32);
+{                   aitconst_gottpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF); }
+                   aitconst_tpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF);
+                   aitconst_tlsgd:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSGD);
+                   aitconst_tlsdesc:
+                     begin
+                       { must be a relative symbol, thus value being valid }
+                       if not(assigned(tai_const(hp).sym)) or not(assigned(tai_const(hp).endsym)) then
+                         Internalerror(2019092904);
+                       ObjData.writereloc(Tai_const(hp).value,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSDESC);
+                     end;
 {$endif arm}
 {$endif arm}
+                   aitconst_dtpoff:
+                     { so far, the size of dtpoff is fixed to 4 bytes }
+                     ObjData.writereloc(Tai_const(hp).symofs,4,Objdata.SymbolRef(tai_const(hp).sym),RELOC_DTPOFF);
                    aitconst_gotoff_symbol:
                    aitconst_gotoff_symbol:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOTOFF);
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOTOFF);
                    aitconst_uleb128bit,
                    aitconst_uleb128bit,
                    aitconst_sleb128bit :
                    aitconst_sleb128bit :
                      begin
                      begin
+                       if Tai_const(hp).fixed_size=0 then
+                         Internalerror(2019030302);
                        if tai_const(hp).consttype=aitconst_uleb128bit then
                        if tai_const(hp).consttype=aitconst_uleb128bit then
-                         leblen:=EncodeUleb128(qword(Tai_const(hp).value),lebbuf)
+                         leblen:=EncodeUleb128(qword(Tai_const(hp).value),lebbuf,Tai_const(hp).fixed_size)
                        else
                        else
-                         leblen:=EncodeSleb128(Tai_const(hp).value,lebbuf);
-                       if leblen<>tai_const(hp).size then
+                         leblen:=EncodeSleb128(Tai_const(hp).value,lebbuf,Tai_const(hp).fixed_size);
+                       if leblen<>tai_const(hp).fixed_size then
                          internalerror(200709271);
                          internalerror(200709271);
                        ObjData.writebytes(lebbuf,leblen);
                        ObjData.writebytes(lebbuf,leblen);
                      end;
                      end;
@@ -1997,6 +2198,18 @@ Implementation
                              break;
                              break;
                            end;
                            end;
                      end;
                      end;
+{$ifdef OMFOBJSUPPORT}
+                   asd_omf_linnum_line:
+                     begin
+                       TOmfObjSection(ObjData.CurrObjSec).LinNumEntries.Add(
+                         TOmfSubRecord_LINNUM_MsLink_Entry.Create(
+                           strtoint(tai_directive(hp).name),
+                           ObjData.CurrObjSec.Size
+                         ));
+                     end;
+{$endif OMFOBJSUPPORT}
+                   else
+                     ;
                  end
                  end
                end;
                end;
              ait_symbolpair:
              ait_symbolpair:
@@ -2017,6 +2230,54 @@ Implementation
              ait_seh_directive :
              ait_seh_directive :
                tai_seh_directive(hp).generate_code(objdata);
                tai_seh_directive(hp).generate_code(objdata);
 {$endif DISABLE_WIN64_SEH}
 {$endif DISABLE_WIN64_SEH}
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100704);
+                 if eabi_section.Size=0 then
+                   begin
+                     s:='A';
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1;
+                     eabi_section.write(ddword,4);
+                     s:='aeabi'#0;
+                     eabi_section.write(s[1],6);
+                     s:=#1;
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1-4-6-1;
+                     eabi_section.write(ddword,4);
+                   end;
+                 leblen:=EncodeUleb128(tai_eabi_attribute(hp).tag,lebbuf,0);
+                 eabi_section.write(lebbuf,leblen);
+
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     begin
+                       leblen:=EncodeUleb128(tai_eabi_attribute(hp).value,lebbuf,0);
+                       eabi_section.write(lebbuf,leblen);
+                     end;
+                   eattrtype_ntbs:
+                     begin
+                       s:=tai_eabi_attribute(hp).valuestr^+#0;
+                       eabi_section.write(s[1],Length(s));
+                     end
+                   else
+                     Internalerror(2019100705);
+                 end;
+                 { update size of attributes section, write directly to the dyn. arrays as
+                   we do not increase the size of section }
+                 TmpDataPos:=eabi_section.Data.Pos;
+                 eabi_section.Data.seek(1);
+                 ddword:=eabi_section.Size-1;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.seek(12);
+                 ddword:=eabi_section.Size-1-4-6;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.Seek(TmpDataPos);
+               end;
+             else
+               ;
            end;
            end;
            hp:=Tai(hp.next);
            hp:=Tai(hp.next);
          end;
          end;

+ 115 - 24
compiler/avr/aasmcpu.pas

@@ -108,7 +108,7 @@ uses
 
 
     { replaces cond. branches by rjmp/jmp and the inverse cond. branch if needed
     { replaces cond. branches by rjmp/jmp and the inverse cond. branch if needed
       and transforms special instructions to valid instruction encodings }
       and transforms special instructions to valid instruction encodings }
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
 
 
 implementation
 implementation
 
 
@@ -396,15 +396,42 @@ implementation
       end;
       end;
 
 
 
 
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
       var
       var
         CurrOffset : longint;
         CurrOffset : longint;
-        curtai : tai;
+        curtai, firstinstruction: tai;
         again : boolean;
         again : boolean;
         l : tasmlabel;
         l : tasmlabel;
         inasmblock : Boolean;
         inasmblock : Boolean;
+
+      procedure remove_instruction;
+        var
+          i: Integer;
+          hp: tai;
+        begin
+          taicpu(firstinstruction).opcode:=A_SLEEP;
+          for i:=0 to taicpu(firstinstruction).opercnt-1 do
+            taicpu(firstinstruction).freeop(i);
+          taicpu(firstinstruction).opercnt:=0;
+          taicpu(firstinstruction).ops:=0;
+          firstinstruction:=tai(firstinstruction.Next);
+          while assigned(firstinstruction) do
+            begin
+              if firstinstruction.typ in [ait_symbol_end,ait_label] then
+                firstinstruction:=tai(firstinstruction.Next)
+              else
+                begin
+                  hp:=tai(firstinstruction.Next);
+                  list.Remove(firstinstruction);
+                  firstinstruction.free;
+                  firstinstruction:=hp;
+                end;
+            end;
+        end;
+
       begin
       begin
         again:=true;
         again:=true;
+        Result:=true;
         while again do
         while again do
           begin
           begin
             again:=false;
             again:=false;
@@ -422,6 +449,8 @@ implementation
                       end;
                       end;
                     ait_align:
                     ait_align:
                       inc(CurrOffset,tai_align(curtai).aligntype);
                       inc(CurrOffset,tai_align(curtai).aligntype);
+                    ait_const:
+                      inc(CurrOffset,tai_const(curtai).size);
                     ait_symbolpair,
                     ait_symbolpair,
                     ait_marker:
                     ait_marker:
                       ;
                       ;
@@ -437,34 +466,96 @@ implementation
 
 
             curtai:=tai(list.first);
             curtai:=tai(list.first);
             inasmblock:=false;
             inasmblock:=false;
+            firstinstruction:=nil;
             while assigned(curtai) do
             while assigned(curtai) do
               begin
               begin
                 case curtai.typ of
                 case curtai.typ of
                   ait_instruction:
                   ait_instruction:
-                    case taicpu(curtai).opcode of
-                      A_BRxx:
-                        if (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
-                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63) then
+                    begin
+                      if not(assigned(firstinstruction)) then
+                        firstinstruction:=curtai;
+                      case taicpu(curtai).opcode of
+                        A_BRxx:
+                          if (taicpu(curtai).oper[0]^.typ=top_ref) and ((taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
+                            (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63)) then
+                            begin
+                              if inasmblock then
+                                Message(asmw_e_brxx_out_of_range)
+                              else
+                                begin
+                                  current_asmdata.getjumplabel(l);
+                                  list.insertafter(tai_label.create(l),curtai);
+                                  if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
+                                    list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai)
+                                  else
+                                    list.insertafter(taicpu.op_sym(A_RJMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
+                                  taicpu(curtai).oper[0]^.ref^.symbol:=l;
+                                  taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
+                                  again:=true;
+                                end;
+                            end;
+                        A_JMP:
+                          { replace JMP by RJMP? ...
+                            ... but do not mess with asm block }
+                          if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
+                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
+                          { jmps to function go outside the currently considered scope, so do not mess with them.
+                            Those are generated by the peephole optimizer from call/ret sequences }
+                          not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
                           begin
                           begin
-                            current_asmdata.getjumplabel(l);
-                            list.insertafter(tai_label.create(l),curtai);
-                            list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
-                            taicpu(curtai).oper[0]^.ref^.symbol:=l;
-                            taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
+                            taicpu(curtai).opcode:=A_RJMP;
                             again:=true;
                             again:=true;
                           end;
                           end;
-                      A_JMP:
-                        { replace JMP by RJMP? ...
-                          ... but do not mess with asm block }
-                        if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
-                        (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
-                        { jmps to function go outside the currently considered scope, so do not mess with them.
-                          Those are generated by the peephole optimizer from call/ret sequences }
-                        not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
-                        begin
-                          taicpu(curtai).opcode:=A_RJMP;
-                          again:=true;
-                        end;
+                        A_STS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[0]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_OUT;
+                                    taicpu(curtai).loadconst(0,ref^.offset);
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_LDS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[1]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_IN;
+                                    taicpu(curtai).loadconst(1,ref^.offset)
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_SBIW,
+                        A_MULS,
+                        A_ICALL,
+                        A_IJMP,
+                        A_STD,
+                        A_LD,
+                        A_LDD,
+                        A_ST,
+                        A_ROR,
+                        A_POP,
+                        A_PUSH:
+                          begin
+                            { certain cpu types do not support some instructions, so replace them }
+                            if current_settings.cputype=cpu_avr1 then
+                              begin
+                                remove_instruction;
+                                result:=false;
+                              end;
+                          end;
+                      end;
                     end;
                     end;
                   ait_marker:
                   ait_marker:
                     case tai_marker(curtai).Kind of
                     case tai_marker(curtai).Kind of

+ 25 - 16
compiler/avr/agavrgas.pas

@@ -75,6 +75,9 @@ unit agavrgas;
 
 
     Procedure TAVRInstrWriter.WriteInstruction(hp : tai);
     Procedure TAVRInstrWriter.WriteInstruction(hp : tai);
 
 
+      var
+        op: TAsmOp;
+
       function getreferencestring(var ref : treference) : string;
       function getreferencestring(var ref : treference) : string;
         var
         var
           s : string;
           s : string;
@@ -104,22 +107,26 @@ unit agavrgas;
                     NR_R30:
                     NR_R30:
                       s:=s+'Z';
                       s:=s+'Z';
                     else
                     else
-                      s:=gas_regname(base);
+                      s:=s+gas_regname(base);
                   end;
                   end;
                   if addressmode=AM_POSTINCREMENT then
                   if addressmode=AM_POSTINCREMENT then
-                    s:=s+'+';
-
-                  if offset>0 then
-                    s:=s+'+'+tostr(offset)
-                  else if offset<0 then
-                    s:=s+tostr(offset)
+                    s:=s+'+'
+                  else if addressmode = AM_UNCHANGED then
+                    begin
+                      if (offset>0) or ((offset=0) and (op in [A_LDD,A_STD])) then
+                        s:=s+'+'+tostr(offset)
+                      else if offset<0 then
+                        s:=s+tostr(offset);
+                    end;
                 end
                 end
               else if assigned(symbol) or (offset<>0) then
               else if assigned(symbol) or (offset<>0) then
                 begin
                 begin
                   if assigned(symbol) then
                   if assigned(symbol) then
                     s:=ReplaceForbiddenAsmSymbolChars(symbol.name);
                     s:=ReplaceForbiddenAsmSymbolChars(symbol.name);
 
 
-                  if offset<0 then
+                  if s='' then
+                    s:=tostr(offset)
+                  else if offset<0 then
                     s:=s+tostr(offset)
                     s:=s+tostr(offset)
                   else if offset>0 then
                   else if offset>0 then
                     s:=s+'+'+tostr(offset);
                     s:=s+'+'+tostr(offset);
@@ -135,7 +142,10 @@ unit agavrgas;
                     else
                     else
                       s:='('+s+')';
                       s:='('+s+')';
                   end;
                   end;
-                end;
+                end
+              { reference to address 0? }
+              else if not(assigned(symbol)) and (offset=0) then
+                s:='(0)';
             end;
             end;
           getreferencestring:=s;
           getreferencestring:=s;
         end;
         end;
@@ -157,9 +167,8 @@ unit agavrgas;
                 begin
                 begin
                   hs:=ReplaceForbiddenAsmSymbolChars(o.ref^.symbol.name);
                   hs:=ReplaceForbiddenAsmSymbolChars(o.ref^.symbol.name);
                   if o.ref^.offset>0 then
                   if o.ref^.offset>0 then
-                   hs:=hs+'+'+tostr(o.ref^.offset)
-                  else
-                   if o.ref^.offset<0 then
+                    hs:=hs+'+'+tostr(o.ref^.offset)
+                  else if o.ref^.offset<0 then
                     hs:=hs+tostr(o.ref^.offset);
                     hs:=hs+tostr(o.ref^.offset);
                   getopstr:=hs;
                   getopstr:=hs;
                 end
                 end
@@ -170,10 +179,10 @@ unit agavrgas;
           end;
           end;
         end;
         end;
 
 
-    var op: TAsmOp;
-        s: string;
-        i: byte;
-        sep: string[3];
+    var
+      s: string;
+      i: byte;
+      sep: string[3];
     begin
     begin
       op:=taicpu(hp).opcode;
       op:=taicpu(hp).opcode;
       s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition];
       s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition];

+ 281 - 152
compiler/avr/aoptcpu.pas

@@ -26,11 +26,11 @@ Unit aoptcpu;
 
 
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
-{$define DEBUG_AOPTCPU}
+{ $define DEBUG_AOPTCPU}
 
 
 Interface
 Interface
 
 
-uses cpubase, cgbase, aasmtai, aopt,AoptObj, aoptcpub;
+uses cpubase,cgbase,aasmtai,aopt,AoptObj,aoptcpub;
 
 
 Type
 Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
   TCpuAsmOptimizer = class(TAsmOptimizer)
@@ -42,6 +42,8 @@ Type
     function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
     function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
     function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
     function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
 
 
+    function InvertSkipInstruction(var p: tai): boolean;
+
     { uses the same constructor as TAopObj }
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
     procedure PeepHoleOptPass2;override;
@@ -54,6 +56,7 @@ Implementation
     verbose,
     verbose,
     cpuinfo,
     cpuinfo,
     aasmbase,aasmcpu,aasmdata,
     aasmbase,aasmcpu,aasmdata,
+    aoptutils,
     globals,globtype,
     globals,globtype,
     cgutils;
     cgutils;
 
 
@@ -74,7 +77,9 @@ Implementation
         (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
         (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
         (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
         (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
         (r1.relsymbol = r2.relsymbol) and
         (r1.relsymbol = r2.relsymbol) and
-        (r1.addressmode = r2.addressmode);
+        (r1.addressmode = r2.addressmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
     end;
     end;
 
 
 
 
@@ -126,13 +131,6 @@ Implementation
     end;
     end;
 
 
 
 
-  function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
-    begin
-      Result:=(taicpu(instr).ops=2) and
-        (taicpu(instr).oper[0]^.typ=ot0) and
-        (taicpu(instr).oper[1]^.typ=ot1);
-    end;
-
 {$ifdef DEBUG_AOPTCPU}
 {$ifdef DEBUG_AOPTCPU}
   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
     begin
     begin
@@ -150,6 +148,10 @@ Implementation
       If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
       If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
               ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
               ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
         Result:=true
         Result:=true
+      else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
+        ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
+         (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
+        Result:=true
       else
       else
         Result:=inherited RegInInstruction(Reg, p1);
         Result:=inherited RegInInstruction(Reg, p1);
     end;
     end;
@@ -206,32 +208,96 @@ Implementation
       if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
       if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
         i:=1;
         i:=1;
 
 
-      while(i<p.ops) do
+      while i<p.ops do
         begin
         begin
-          case p.oper[I]^.typ of
+          case p.oper[i]^.typ of
             top_reg:
             top_reg:
-              Result := (p.oper[I]^.reg = reg) or
+              Result := (p.oper[i]^.reg = reg) or
                 { MOVW }
                 { MOVW }
                 ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
                 ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
             top_ref:
             top_ref:
               Result :=
               Result :=
-                (p.oper[I]^.ref^.base = reg) or
-                (p.oper[I]^.ref^.index = reg);
+                (p.oper[i]^.ref^.base = reg) or
+                (p.oper[i]^.ref^.index = reg);
           end;
           end;
           { Bailout if we found something }
           { Bailout if we found something }
           if Result then
           if Result then
             exit;
             exit;
-          Inc(I);
+          Inc(i);
+        end;
+    end;
+
+
+  {
+    Turns
+      sbis ?
+      jmp .Lx
+      op
+    .Lx:
+
+    Into
+      sbic ?
+      op
+
+    For all types of skip instructions
+  }
+  function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
+
+    function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
+      begin
+        repeat
+          result:=GetNextInstruction(p,next);
+          p:=next;
+        until
+          (not result) or
+          (not assigned(next)) or
+          (next.typ in [ait_instruction]);
+
+        result:=assigned(next) and (next.typ in [ait_instruction]);
+      end;
+
+    var
+      hp1, hp2, hp3: tai;
+      s: string;
+    begin
+      result:=false;
+
+      if GetNextInstruction(taicpu(p),hp1) and
+        (hp1.typ=ait_instruction) and
+        (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
+        (taicpu(hp1).ops=1) and
+        (taicpu(hp1).oper[0]^.typ=top_ref) and
+        (taicpu(hp1).oper[0]^.ref^.offset=0) and
+        (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+        GetNextInstructionWithoutLabel(hp1,hp2) and
+        (hp2.typ=ait_instruction) and
+        (not taicpu(hp2).is_jmp) and
+        GetNextInstruction(hp2,hp3) and
+        FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
+        begin
+          DebugMsg('SkipJump2InvertedSkip', p);
+
+          case taicpu(p).opcode of
+            A_SBIS: taicpu(p).opcode:=A_SBIC;
+            A_SBIC: taicpu(p).opcode:=A_SBIS;
+            A_SBRS: taicpu(p).opcode:=A_SBRC;
+            A_SBRC: taicpu(p).opcode:=A_SBRS;
+          end;
+
+          TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
+
+          asml.remove(hp1);
+          hp1.free;
         end;
         end;
     end;
     end;
 
 
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
     var
       hp1,hp2,hp3,hp4,hp5: tai;
       hp1,hp2,hp3,hp4,hp5: tai;
       alloc, dealloc: tai_regalloc;
       alloc, dealloc: tai_regalloc;
       i: integer;
       i: integer;
       l: TAsmLabel;
       l: TAsmLabel;
-      TmpUsedRegs : TAllUsedRegs;
     begin
     begin
       result := false;
       result := false;
       case p.typ of
       case p.typ of
@@ -252,12 +318,12 @@ Implementation
               GetNextInstruction(p, hp1) and
               GetNextInstruction(p, hp1) and
               ((MatchInstruction(hp1, A_CP) and
               ((MatchInstruction(hp1, A_CP) and
                 (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
                 (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
-                  (taicpu(hp1).oper[1]^.reg = NR_R1)) or
+                  (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
                  ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
                  ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
-                  (taicpu(hp1).oper[0]^.reg = NR_R1) and
+                  (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
                   (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
                   (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
                                         A_LSL,A_LSR,
                                         A_LSL,A_LSR,
-                                        A_OR,A_ORI,A_ROL,A_ROR])))) or
+                                        A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
                (MatchInstruction(hp1, A_CPI) and
                (MatchInstruction(hp1, A_CPI) and
                 (taicpu(p).opcode = A_ANDI) and
                 (taicpu(p).opcode = A_ANDI) and
                 (taicpu(p).oper[1]^.typ=top_const) and
                 (taicpu(p).oper[1]^.typ=top_const) and
@@ -272,7 +338,9 @@ Implementation
                 EQ = Z=1; NE = Z=0;
                 EQ = Z=1; NE = Z=0;
                 MI = N=1; PL = N=0; }
                 MI = N=1; PL = N=0; }
               MatchInstruction(hp2, A_BRxx) and
               MatchInstruction(hp2, A_BRxx) and
-              (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) { and
+              ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
+              { sub/sbc set all flags }
+               (taicpu(p).opcode in [A_SUB,A_SBI])){ and
               no flag allocation tracking implemented yet on avr
               no flag allocation tracking implemented yet on avr
               assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
               assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
               begin
               begin
@@ -288,7 +356,9 @@ Implementation
                 }
                 }
 
 
                 // If we compare to the same value we are masking then invert the comparison
                 // If we compare to the same value we are masking then invert the comparison
-                if (taicpu(hp1).opcode=A_CPI) then
+                if (taicpu(hp1).opcode=A_CPI) or
+                  { sub/sbc with reverted? }
+                  ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
                   taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
                   taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
 
 
                 asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
                 asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
@@ -307,20 +377,23 @@ Implementation
                   begin
                   begin
                     { turn
                     { turn
                       ldi reg0, imm
                       ldi reg0, imm
-                      cp/mov reg1, reg0
+                      <op> reg1, reg0
                       dealloc reg0
                       dealloc reg0
                       into
                       into
-                      cpi/ldi reg1, imm
+                      <op>i reg1, imm
                     }
                     }
-                    if MatchOpType(p,top_reg,top_const) and
+                    if MatchOpType(taicpu(p),top_reg,top_const) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
-                       MatchInstruction(hp1,[A_CP,A_MOV],2) and
+                       MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
-                       MatchOpType(hp1,top_reg,top_reg) and
+                       MatchOpType(taicpu(hp1),top_reg,top_reg) and
                        (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
                        (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
-                       (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) then
+                       (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
+                       not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
                       begin
                       begin
-                        CopyUsedRegs(TmpUsedRegs);
+                        TransferUsedRegs(TmpUsedRegs);
+                        UpdateUsedRegs(TmpUsedRegs,tai(p.next));
+                        UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
                         if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
                         if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
                           begin
                           begin
                             case taicpu(hp1).opcode of
                             case taicpu(hp1).opcode of
@@ -328,6 +401,10 @@ Implementation
                                 taicpu(hp1).opcode:=A_CPI;
                                 taicpu(hp1).opcode:=A_CPI;
                               A_MOV:
                               A_MOV:
                                 taicpu(hp1).opcode:=A_LDI;
                                 taicpu(hp1).opcode:=A_LDI;
+                              A_AND:
+                                taicpu(hp1).opcode:=A_ANDI;
+                              A_SUB:
+                                taicpu(hp1).opcode:=A_SUBI;
                               else
                               else
                                 internalerror(2016111901);
                                 internalerror(2016111901);
                             end;
                             end;
@@ -344,16 +421,10 @@ Implementation
                                 dealloc.Free;
                                 dealloc.Free;
                               end;
                               end;
 
 
-                            DebugMsg('Peephole LdiMov/Cp2Ldi/Cpi performed', p);
-
-                            GetNextInstruction(p,hp1);
-                            asml.Remove(p);
-                            p.Free;
-                            p:=hp1;
+                            DebugMsg('Peephole LdiOp2Opi performed', p);
 
 
-                            result:=true;
+                            RemoveCurrentP(p);
                           end;
                           end;
-                        ReleaseUsedRegs(TmpUsedRegs);
                       end;
                       end;
                   end;
                   end;
                 A_STS:
                 A_STS:
@@ -362,13 +433,20 @@ Implementation
                     (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
                     (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
                     (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[0]^.ref^.offset>=32) and
-                    (taicpu(p).oper[0]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=0) and
+                      (taicpu(p).oper[0]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=32) and
+                      (taicpu(p).oper[0]^.ref^.offset<=95))) then
                     begin
                     begin
                       DebugMsg('Peephole Sts2Out performed', p);
                       DebugMsg('Peephole Sts2Out performed', p);
 
 
                       taicpu(p).opcode:=A_OUT;
                       taicpu(p).opcode:=A_OUT;
-                      taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
                     end;
                     end;
                 A_LDS:
                 A_LDS:
                   if (taicpu(p).oper[1]^.ref^.symbol=nil) and
                   if (taicpu(p).oper[1]^.ref^.symbol=nil) and
@@ -376,13 +454,20 @@ Implementation
                     (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
                     (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
                     (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[1]^.ref^.offset>=32) and
-                    (taicpu(p).oper[1]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=0) and
+                      (taicpu(p).oper[1]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=32) and
+                      (taicpu(p).oper[1]^.ref^.offset<=95))) then
                     begin
                     begin
                       DebugMsg('Peephole Lds2In performed', p);
                       DebugMsg('Peephole Lds2In performed', p);
 
 
                       taicpu(p).opcode:=A_IN;
                       taicpu(p).opcode:=A_IN;
-                      taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
                     end;
                     end;
                 A_IN:
                 A_IN:
                     if GetNextInstruction(p,hp1) then
                     if GetNextInstruction(p,hp1) then
@@ -486,6 +571,46 @@ Implementation
                             result:=true;
                             result:=true;
                           end;
                           end;
                       end;
                       end;
+                A_SBRS,
+                A_SBRC:
+                  begin
+                    {
+                      Turn
+                        in rx, y
+                        sbr* rx, z
+                      Into
+                        sbi* y, z
+                    }
+                    if (taicpu(p).ops=2) and
+                       (taicpu(p).oper[0]^.typ=top_reg) and
+                       assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
+                       GetLastInstruction(p,hp1) and
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).opcode=A_IN) and
+                       (taicpu(hp1).ops=2) and
+                       (taicpu(hp1).oper[1]^.typ=top_const) and
+                       (taicpu(hp1).oper[1]^.val in [0..31]) and
+                       MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
+                       (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
+                      begin
+                        if taicpu(p).opcode=A_SBRS then
+                          taicpu(p).opcode:=A_SBIS
+                        else
+                          taicpu(p).opcode:=A_SBIC;
+
+                        taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
+
+                        DebugMsg('Peephole InSbrx2Sbix performed', p);
+
+                        asml.Remove(hp1);
+                        hp1.free;
+
+                        result:=true;
+                      end;
+
+                    if InvertSkipInstruction(p) then
+                      result:=true;
+                  end;
                 A_ANDI:
                 A_ANDI:
                   begin
                   begin
                     {
                     {
@@ -543,15 +668,32 @@ Implementation
                       begin
                       begin
                         DebugMsg('Redundant Andi removed', p);
                         DebugMsg('Redundant Andi removed', p);
 
 
-                        GetNextInstruction(p,hp1);
+                        result:=RemoveCurrentP(p);
+                      end;
+                  end;
+                A_ADD:
+                  begin
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
+                    GetNextInstruction(p, hp1) and
+                    MatchInstruction(hp1,A_ADC) then
+                    begin
+                      DebugMsg('Peephole AddAdc2Add performed', p);
 
 
-                        AsmL.Remove(p);
-                        p.free;
+                      result:=RemoveCurrentP(p);
+                    end;
+                  end;
+                A_SUB:
+                  begin
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
+                    GetNextInstruction(p, hp1) and
+                    MatchInstruction(hp1,A_SBC) then
+                    begin
+                      DebugMsg('Peephole SubSbc2Sub performed', p);
 
 
-                        p:=hp1;
+                      taicpu(hp1).opcode:=A_SUB;
 
 
-                        result:=true;
-                      end;
+                      result:=RemoveCurrentP(p);
+                    end;
                   end;
                   end;
                 A_CLR:
                 A_CLR:
                   begin
                   begin
@@ -573,10 +715,7 @@ Implementation
                       begin
                       begin
                         DebugMsg('Peephole ClrMov2Mov performed', p);
                         DebugMsg('Peephole ClrMov2Mov performed', p);
 
 
-                        asml.Remove(p);
-                        p.Free;
-                        p:=hp1;
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end
                       end
                     { turn
                     { turn
                       clr rX
                       clr rX
@@ -600,7 +739,7 @@ Implementation
                       begin
                       begin
                         DebugMsg('Peephole ClrAdc2Adc performed', p);
                         DebugMsg('Peephole ClrAdc2Adc performed', p);
 
 
-                        taicpu(hp1).oper[1]^.reg:=NR_R1;
+                        taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
 
 
                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
@@ -613,12 +752,7 @@ Implementation
                             dealloc.Free;
                             dealloc.Free;
                           end;
                           end;
 
 
-                        GetNextInstruction(p,hp1);
-                        asml.Remove(p);
-                        p.free;
-                        p:=hp1;
-
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end;
                       end;
                   end;
                   end;
                 A_PUSH:
                 A_PUSH:
@@ -648,27 +782,22 @@ Implementation
                        GetNextInstruction(hp2,hp3) and
                        GetNextInstruction(hp2,hp3) and
                        MatchInstruction(hp3,A_POP) then
                        MatchInstruction(hp3,A_POP) then
                       begin
                       begin
-                       if (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
+                       if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
+                         (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
                          ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
                          (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
                          (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
                          ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
                          begin
                          begin
                            DebugMsg('Peephole PushPushPopPop2Movw performed', p);
                            DebugMsg('Peephole PushPushPopPop2Movw performed', p);
 
 
-                           taicpu(p).ops:=2;
-                           taicpu(p).opcode:=A_MOVW;
+                           taicpu(hp3).ops:=2;
+                           taicpu(hp3).opcode:=A_MOVW;
 
 
-                           taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
-                           taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
+                           taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
 
 
-                           asml.Remove(hp1);
-                           hp1.Free;
-                           asml.Remove(hp2);
-                           hp2.Free;
-                           asml.Remove(hp3);
-                           hp3.Free;
-
-                           result:=true;
+                           RemoveCurrentP(p);
+                           RemoveCurrentP(p);
+                           result:=RemoveCurrentP(p);
                          end
                          end
                        else
                        else
                          begin
                          begin
@@ -686,6 +815,17 @@ Implementation
                            taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
                            taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
                            taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
                            taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
 
 
+                           { life range of reg2 and reg3 is increased, fix register allocation entries }
+                           TransferUsedRegs(TmpUsedRegs);
+                           UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
+                           AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
+
+                           TransferUsedRegs(TmpUsedRegs);
+                           AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
+
+                           IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
+                           UpdateUsedRegs(tai(p.Next));
+
                            asml.Remove(hp2);
                            asml.Remove(hp2);
                            hp2.Free;
                            hp2.Free;
                            asml.Remove(hp3);
                            asml.Remove(hp3);
@@ -708,27 +848,60 @@ Implementation
                        asml.Remove(hp1);
                        asml.Remove(hp1);
                        hp1.Free;
                        hp1.Free;
 
 
+                       result:=true;
+                    end;
+                A_RCALL:
+                  if (cs_opt_level4 in current_settings.optimizerswitches) and
+                    GetNextInstruction(p,hp1) and
+                    MatchInstruction(hp1,A_RET) then
+                    begin
+                       DebugMsg('Peephole RCallReg2RJmp performed', p);
+
+                       taicpu(p).opcode:=A_RJMP;
+
+                       asml.Remove(hp1);
+                       hp1.Free;
+
                        result:=true;
                        result:=true;
                     end;
                     end;
                 A_MOV:
                 A_MOV:
                   begin
                   begin
+                    { change
+                      mov reg0, reg1
+                      dealloc reg0
+                      into
+                      dealloc reg0
+                    }
+                    if MatchOpType(taicpu(p),top_reg,top_reg) then
+                      begin
+                        TransferUsedRegs(TmpUsedRegs);
+                        UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
+                        if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
+                          { reg. allocation information before calls is not perfect, so don't do this before
+                            calls/icalls }
+                          GetNextInstruction(p,hp1) and
+                          not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
+                          begin
+                            DebugMsg('Peephole Mov2Nop performed', p);
+                            result:=RemoveCurrentP(p);
+                            exit;
+                          end;
+                      end;
+
                     { turn
                     { turn
                       mov reg0, reg1
                       mov reg0, reg1
-                      push reg0
+                      <op> reg2,reg0
                       dealloc reg0
                       dealloc reg0
                       into
                       into
-                      push reg1
+                      <op> reg2,reg1
                     }
                     }
-                    if (taicpu(p).ops=2) and
-                       (taicpu(p).oper[0]^.typ = top_reg) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
+                    if MatchOpType(taicpu(p),top_reg,top_reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
-                       (hp1.typ = ait_instruction) and
-                       (taicpu(hp1).opcode in [A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
-                                               A_STD,A_ST,
-                                               A_OUT,A_IN]) and
-                       RegInInstruction(taicpu(p).oper[0]^.reg, hp1) and
+                       (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
+                                               A_OUT,A_IN]) or
+                       { the reference register of ST/STD cannot be replaced }
+                       (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
                        (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
                        (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
                        {(taicpu(hp1).ops=1) and
                        {(taicpu(hp1).ops=1) and
                        (taicpu(hp1).oper[0]^.typ = top_reg) and
                        (taicpu(hp1).oper[0]^.typ = top_reg) and
@@ -753,11 +926,12 @@ Implementation
                             dealloc.Free;
                             dealloc.Free;
                           end;
                           end;
 
 
-                        GetNextInstruction(p,hp1);
-                        asml.Remove(p);
-                        p.free;
-                        p:=hp1;
-                        result:=true;
+                        { life range of reg1 is increased }
+                        AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
+                        { p will be removed, update used register as we continue
+                          with the next instruction after p }
+
+                        result:=RemoveCurrentP(p);
                       end
                       end
                     { remove
                     { remove
                       mov reg0,reg0
                       mov reg0,reg0
@@ -769,11 +943,7 @@ Implementation
                       begin
                       begin
                         DebugMsg('Peephole RedundantMov performed', p);
                         DebugMsg('Peephole RedundantMov performed', p);
 
 
-                        GetNextInstruction(p,hp1);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp1;
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end
                       end
                     {
                     {
                       Turn
                       Turn
@@ -784,22 +954,20 @@ Implementation
                         op ry,rz
                         op ry,rz
                     }
                     }
                     else if (taicpu(p).ops=2) and
                     else if (taicpu(p).ops=2) and
-                       (taicpu(p).oper[0]^.typ = top_reg) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(p),top_reg,top_reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (hp1.typ=ait_instruction) and
                        (hp1.typ=ait_instruction) and
                        (taicpu(hp1).ops >= 1) and
                        (taicpu(hp1).ops >= 1) and
                        (taicpu(hp1).oper[0]^.typ = top_reg) and
                        (taicpu(hp1).oper[0]^.typ = top_reg) and
                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
-                       (hp2.typ=ait_instruction) and
-                       (taicpu(hp2).opcode=A_MOV) and
-                       (taicpu(hp2).oper[0]^.typ = top_reg) and
-                       (taicpu(hp2).oper[1]^.typ = top_reg) and
+                       MatchInstruction(hp2,A_MOV) and
+                       MatchOpType(taicpu(hp2),top_reg,top_reg) and
                        (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
                        (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
                        (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
                        (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
+                                               A_INC,A_DEC,
                                                A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
                                                A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
                       begin
                       begin
@@ -823,16 +991,10 @@ Implementation
                             dealloc.Free;
                             dealloc.Free;
                           end;
                           end;
 
 
-                        GetNextInstruction(p,hp1);
-
-                        asml.remove(p);
-                        p.free;
                         asml.remove(hp2);
                         asml.remove(hp2);
                         hp2.free;
                         hp2.free;
 
 
-                        p:=hp1;
-
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end
                       end
                     {
                     {
                       Turn
                       Turn
@@ -843,18 +1005,15 @@ Implementation
                         op rw,ry
                         op rw,ry
                     }
                     }
                     else if (taicpu(p).ops=2) and
                     else if (taicpu(p).ops=2) and
-                       (taicpu(p).oper[0]^.typ = top_reg) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(p),top_reg,top_reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (hp1.typ=ait_instruction) and
                        (hp1.typ=ait_instruction) and
                        (taicpu(hp1).ops = 2) and
                        (taicpu(hp1).ops = 2) and
-                       (taicpu(hp1).oper[0]^.typ = top_reg) and
-                       (taicpu(hp1).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(hp1),top_reg,top_reg) and
                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
                        (hp2.typ=ait_instruction) and
                        (hp2.typ=ait_instruction) and
                        (taicpu(hp2).opcode=A_MOV) and
                        (taicpu(hp2).opcode=A_MOV) and
-                       (taicpu(hp2).oper[0]^.typ = top_reg) and
-                       (taicpu(hp2).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(hp2),top_reg,top_reg) and
                        (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
                        (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
@@ -878,16 +1037,10 @@ Implementation
                             dealloc.Free;
                             dealloc.Free;
                           end;
                           end;
 
 
-                        GetNextInstruction(p,hp1);
+                        result:=RemoveCurrentP(p);
 
 
-                        asml.remove(p);
-                        p.free;
                         asml.remove(hp2);
                         asml.remove(hp2);
                         hp2.free;
                         hp2.free;
-
-                        p:=hp1;
-
-                        result:=true;
                       end
                       end
                     { fold
                     { fold
                       mov reg2,reg0
                       mov reg2,reg0
@@ -917,6 +1070,8 @@ Implementation
                           begin
                           begin
                             asml.Remove(alloc);
                             asml.Remove(alloc);
                             asml.InsertBefore(alloc,p);
                             asml.InsertBefore(alloc,p);
+                            { proper book keeping of currently used registers }
+                            IncludeRegInUsedRegs(taicpu(hp1).oper[0]^.reg,UsedRegs);
                           end;
                           end;
 
 
                         taicpu(p).opcode:=A_MOVW;
                         taicpu(p).opcode:=A_MOVW;
@@ -929,19 +1084,17 @@ Implementation
                       mov rX,...
                       mov rX,...
                       mov rX,...
                       mov rX,...
                     }
                     }
-                    else if (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_MOV) then
-                      while (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_MOV) and
+                    else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) then
+                      while MatchInstruction(hp1,A_MOV) and
                             MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
                             MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
                             { don't remove the first mov if the second is a mov rX,rX }
                             { don't remove the first mov if the second is a mov rX,rX }
                             not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
                             not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
                         begin
                         begin
                           DebugMsg('Peephole MovMov2Mov performed', p);
                           DebugMsg('Peephole MovMov2Mov performed', p);
 
 
-                          asml.remove(p);
-                          p.free;
-                          p:=hp1;
+                          result:=RemoveCurrentP(p);
+
                           GetNextInstruction(hp1,hp1);
                           GetNextInstruction(hp1,hp1);
-                          result:=true;
                           if not assigned(hp1) then
                           if not assigned(hp1) then
                             break;
                             break;
                         end;
                         end;
@@ -961,33 +1114,8 @@ Implementation
                           op
                           op
                         .L1:
                         .L1:
                     }
                     }
-                    if GetNextInstruction(p, hp1) and
-                       (hp1.typ=ait_instruction) and
-                       (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
-                       (taicpu(hp1).ops>0) and
-                       (taicpu(hp1).oper[0]^.typ = top_ref) and
-                       (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
-                       GetNextInstruction(hp1, hp2) and
-                       (hp2.typ=ait_instruction) and
-                       (not taicpu(hp2).is_jmp) and
-                       GetNextInstruction(hp2, hp3) and
-                       (hp3.typ=ait_label) and
-                       (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
-                      begin
-                        DebugMsg('Peephole SbiJmp2Sbi performed',p);
-
-                        if taicpu(p).opcode=A_SBIC then
-                          taicpu(p).opcode:=A_SBIS
-                        else
-                          taicpu(p).opcode:=A_SBIC;
-
-                        tai_label(hp3).labsym.decrefs;
-
-                        AsmL.remove(hp1);
-                        taicpu(hp1).Free;
-
-                        result:=true;
-                      end
+                    if InvertSkipInstruction(p) then
+                      result:=true
                     {
                     {
                       Turn
                       Turn
                           sbiX X, y
                           sbiX X, y
@@ -1055,3 +1183,4 @@ Implementation
 begin
 begin
   casmoptimizer:=TCpuAsmOptimizer;
   casmoptimizer:=TCpuAsmOptimizer;
 End.
 End.
+

+ 0 - 4
compiler/avr/aoptcpub.pas

@@ -75,10 +75,6 @@ Const
 
 
   MaxCh = 2;
   MaxCh = 2;
 
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 2;
-
 {Oper index of operand that contains the source (reference) with a load }
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 {instruction                                                            }
 
 

+ 39 - 35
compiler/avr/avrreg.dat

@@ -2,43 +2,47 @@
 ; AVR registers
 ; AVR registers
 ;
 ;
 ; layout
 ; layout
-; <name>,<type>,<value>,<stdname>,<stab idx>,<dwarf idx>
+; <name>,<type>,<subreg>,<value>,<stdname>,<stab idx>,<dwarf idx>
 ;
 ;
-NO,$00,$00,INVALID,-1,-1
+NO,$00,$00,$00,INVALID,-1,-1
 
 
-R0,$01,$00,r0,0,0
-R1,$01,$01,r1,1,1
-R2,$01,$02,r2,2,2
-R3,$01,$03,r3,3,3
-R4,$01,$04,r4,4,4
-R5,$01,$05,r5,5,5
-R6,$01,$06,r6,6,6
-R7,$01,$07,r7,7,7
-R8,$01,$08,r8,8,8
-R9,$01,$09,r9,9,9
-R10,$01,$0a,r10,10,10
-R11,$01,$0b,r11,11,11
-R12,$01,$0c,r12,12,12
-R13,$01,$0d,r13,13,13
-R14,$01,$0e,r14,14,14
-R15,$01,$0f,r15,15,15
-R16,$01,$10,r16,16,16
-R17,$01,$11,r17,17,17
-R18,$01,$12,r18,18,18
-R19,$01,$13,r19,19,19
-R20,$01,$14,r20,20,20
-R21,$01,$15,r21,21,21
-R22,$01,$16,r22,22,22
-R23,$01,$17,r23,23,23
-R24,$01,$18,r24,24,24
-R25,$01,$19,r25,25,25
-R26,$01,$1a,r26,26,26
-R27,$01,$1b,r27,27,27
-R28,$01,$1c,r28,28,28
-R29,$01,$1d,r29,29,29
-R30,$01,$1e,r30,30,30
-R31,$01,$1f,r31,31,31
+R0,$01,$00,$00,r0,0,0
+R1,$01,$00,$01,r1,1,1
+R2,$01,$00,$02,r2,2,2
+R3,$01,$00,$03,r3,3,3
+R4,$01,$00,$04,r4,4,4
+R5,$01,$00,$05,r5,5,5
+R6,$01,$00,$06,r6,6,6
+R7,$01,$00,$07,r7,7,7
+R8,$01,$00,$08,r8,8,8
+R9,$01,$00,$09,r9,9,9
+R10,$01,$00,$0a,r10,10,10
+R11,$01,$00,$0b,r11,11,11
+R12,$01,$00,$0c,r12,12,12
+R13,$01,$00,$0d,r13,13,13
+R14,$01,$00,$0e,r14,14,14
+R15,$01,$00,$0f,r15,15,15
+R16,$01,$00,$10,r16,16,16
+R17,$01,$00,$11,r17,17,17
+R18,$01,$00,$12,r18,18,18
+R19,$01,$00,$13,r19,19,19
+R20,$01,$00,$14,r20,20,20
+R21,$01,$00,$15,r21,21,21
+R22,$01,$00,$16,r22,22,22
+R23,$01,$00,$17,r23,23,23
+R24,$01,$00,$18,r24,24,24
+R25,$01,$00,$19,r25,25,25
+R26,$01,$00,$1a,r26,26,26
+R27,$01,$00,$1b,r27,27,27
+R28,$01,$00,$1c,r28,28,28
+R29,$01,$00,$1d,r29,29,29
+R30,$01,$00,$1e,r30,30,30
+R31,$01,$00,$1f,r31,31,31
 
 
-SREG,$05,$00,sreg,0,0
+X,$01,$03,$1a,x,26,26
+Y,$01,$03,$1c,y,28,28
+Z,$01,$03,$1e,z,30,30
+
+SREG,$05,$00,$00,sreg,0,0
 
 
 
 

+ 22 - 0
compiler/avr/ccpuinnr.inc

@@ -0,0 +1,22 @@
+{
+
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2016 by the Free Pascal development team.
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+  in_avr_cli = in_cpu_first,
+  in_avr_sei = in_cpu_first+1,
+  in_avr_wdr = in_cpu_first+2,
+  in_avr_sleep = in_cpu_first+3,
+  in_avr_nop = in_cpu_first+4,
+  in_avr_save = in_cpu_first+5,
+  in_avr_restore = in_cpu_first+6
+

File diff suppressed because it is too large
+ 374 - 281
compiler/avr/cgcpu.pas


+ 48 - 53
compiler/avr/cpubase.pas

@@ -128,9 +128,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_R0,RS_R1,RS_R18..RS_R27,RS_R30,RS_R31];
       VOLATILE_INTREGISTERS = [RS_R0,RS_R1,RS_R18..RS_R27,RS_R30,RS_R31];
       VOLATILE_FPUREGISTERS = [];
       VOLATILE_FPUREGISTERS = [];
 
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
 {*****************************************************************************
                                 Conditions
                                 Conditions
 *****************************************************************************}
 *****************************************************************************}
@@ -158,7 +155,7 @@ unit cpubase;
 
 
     type
     type
       TResFlags = (F_NotPossible,F_CC,F_CS,F_EQ,F_GE,F_LO,F_LT,
       TResFlags = (F_NotPossible,F_CC,F_CS,F_EQ,F_GE,F_LO,F_LT,
-        F_NE,F_SH,F_VC,F_VS);
+        F_NE,F_SH,F_VC,F_VS,F_PL,F_MI);
 
 
 {*****************************************************************************
 {*****************************************************************************
                                 Operands
                                 Operands
@@ -171,7 +168,7 @@ unit cpubase;
 *****************************************************************************}
 *****************************************************************************}
 
 
     const
     const
-      max_operands = 4;
+      max_operands = 2;
 
 
       maxintregs = 15;
       maxintregs = 15;
       maxfpuregs = 0;
       maxfpuregs = 0;
@@ -232,8 +229,8 @@ unit cpubase;
 *****************************************************************************}
 *****************************************************************************}
 
 
       { Stack pointer register }
       { Stack pointer register }
-      NR_STACK_POINTER_REG = NR_R13;
-      RS_STACK_POINTER_REG = RS_R13;
+      NR_STACK_POINTER_REG = NR_INVALID;
+      RS_STACK_POINTER_REG = RS_INVALID;
       { Frame pointer register }
       { Frame pointer register }
       RS_FRAME_POINTER_REG = RS_R28;
       RS_FRAME_POINTER_REG = RS_R28;
       NR_FRAME_POINTER_REG = NR_R28;
       NR_FRAME_POINTER_REG = NR_R28;
@@ -278,17 +275,6 @@ unit cpubase;
 *****************************************************************************}
 *****************************************************************************}
 
 
     const
     const
-      { Registers which must be saved when calling a routine declared as
-        cppdecl, cdecl, stdcall, safecall, palmossyscall. The registers
-        saved should be the ones as defined in the target ABI and / or GCC.
-
-        This value can be deduced from the CALLED_USED_REGISTERS array in the
-        GCC source.
-      }
-      { on avr, gen_entry/gen_exit code saves/restores registers, so
-        we don't need this array }
-      saved_standard_registers : array[0..0] of tsuperregister =
-        (RS_INVALID);
       { Required parameter alignment when calling a routine declared as
       { Required parameter alignment when calling a routine declared as
         stdcall and cdecl. The alignment value should be the one defined
         stdcall and cdecl. The alignment value should be the one defined
         by GCC or the target ABI.
         by GCC or the target ABI.
@@ -298,9 +284,6 @@ unit cpubase;
       }
       }
       std_param_align = 4;
       std_param_align = 4;
 
 
-      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
-      saved_mm_registers : array[0..0] of tsuperregister = (RS_INVALID);
-
 {*****************************************************************************
 {*****************************************************************************
                                   Helpers
                                   Helpers
 *****************************************************************************}
 *****************************************************************************}
@@ -317,22 +300,19 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
 
-    function dwarf_reg(r:tregister):byte;
-    function GetHigh(const r : TRegister) : TRegister;
-
-    { returns the next virtual register }
-    function GetNextReg(const r : TRegister) : TRegister;
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
 
 
-    { returns the last virtual register }
-    function GetLastReg(const r : TRegister) : TRegister;
+    function dwarf_reg(r:tregister):byte;
+    function dwarf_reg_no_error(r:tregister):shortint;
+    function eh_return_data_regno(nr: longint): longint;
 
 
-    { returns the register with the offset of ofs of a continuous set of register starting with r }
-    function GetOffsetReg(const r : TRegister;ofs : shortint) : TRegister;
-    { returns the register with the offset of ofs of a continuous set of register starting with r and being continued with rhi }
-    function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
 
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
 
 
+    function GetDefaultTmpReg : TRegister;
+    function GetDefaultZeroReg : TRegister;
+
   implementation
   implementation
 
 
     uses
     uses
@@ -376,7 +356,7 @@ unit cpubase;
       const
       const
         inv_flags: array[TResFlags] of TResFlags =
         inv_flags: array[TResFlags] of TResFlags =
           (F_NotPossible,F_CS,F_CC,F_NE,F_LT,F_SH,F_GE,
           (F_NotPossible,F_CS,F_CC,F_NE,F_LT,F_SH,F_GE,
-           F_NE,F_LO,F_VS,F_VC);
+           F_NE,F_LO,F_VS,F_VC,F_MI,F_PL);
       begin
       begin
         f:=inv_flags[f];
         f:=inv_flags[f];
       end;
       end;
@@ -384,9 +364,9 @@ unit cpubase;
 
 
     function flags_to_cond(const f: TResFlags) : TAsmCond;
     function flags_to_cond(const f: TResFlags) : TAsmCond;
       const
       const
-        flag_2_cond: array[F_CC..F_VS] of TAsmCond =
+        flag_2_cond: array[F_CC..F_MI] of TAsmCond =
           (C_CC,C_CS,C_EQ,C_GE,C_LO,C_LT,
           (C_CC,C_CS,C_EQ,C_GE,C_LO,C_LT,
-           C_NE,C_SH,C_VC,C_VS);
+           C_NE,C_SH,C_VC,C_VS,C_PL,C_MI);
       begin
       begin
         if f=F_NotPossible then
         if f=F_NotPossible then
           internalerror(2011022101);
           internalerror(2011022101);
@@ -436,6 +416,24 @@ unit cpubase;
       end;
       end;
 
 
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE]);
+            C_LT:
+              Result := (c in [C_NE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function rotl(d : dword;b : byte) : dword;
     function rotl(d : dword;b : byte) : dword;
       begin
       begin
          result:=(d shr (32-b)) or (d shl b);
          result:=(d shr (32-b)) or (d shl b);
@@ -453,42 +451,39 @@ unit cpubase;
       end;
       end;
 
 
 
 
-    function GetHigh(const r : TRegister) : TRegister;
-      begin
-        result:=TRegister(longint(r)+1)
-      end;
-
-
-    function GetNextReg(const r: TRegister): TRegister;
+    function dwarf_reg_no_error(r:tregister):shortint;
       begin
       begin
-        result:=TRegister(longint(r)+1);
+        result:=regdwarf_table[findreg_by_number(r)];
       end;
       end;
 
 
 
 
-    function GetLastReg(const r: TRegister): TRegister;
+    function eh_return_data_regno(nr: longint): longint;
       begin
       begin
-        result:=TRegister(longint(r)-1);
+        result:=-1;
       end;
       end;
 
 
 
 
-    function GetOffsetReg(const r: TRegister;ofs : shortint): TRegister;
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       begin
       begin
-        result:=TRegister(longint(r)+ofs);
+        is_calljmp:= o in call_jmp_instructions;
       end;
       end;
 
 
 
 
-    function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
+    function GetDefaultTmpReg: TRegister;
       begin
       begin
-        if ofs>3 then
-          result:=TRegister(longint(rhi)+ofs-4)
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R16
         else
         else
-          result:=TRegister(longint(r)+ofs);
+          Result:=NR_R0;
       end;
       end;
 
 
 
 
-    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+    function GetDefaultZeroReg: TRegister;
       begin
       begin
-        is_calljmp:= o in call_jmp_instructions;
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R17
+        else
+          Result:=NR_R1;
       end;
       end;
 
 
 
 

+ 445 - 287
compiler/avr/cpuinfo.pas

@@ -14,6 +14,8 @@
 
 
 Unit CPUInfo;
 Unit CPUInfo;
 
 
+{$i fpcdefs.inc}
+
 Interface
 Interface
 
 
   uses
   uses
@@ -35,6 +37,7 @@ Type
    { possible supported processors for this target }
    { possible supported processors for this target }
    tcputype =
    tcputype =
       (cpu_none,
       (cpu_none,
+       cpu_avrtiny,
        cpu_avr1,
        cpu_avr1,
        cpu_avr2,
        cpu_avr2,
        cpu_avr25,
        cpu_avr25,
@@ -44,13 +47,14 @@ Type
        cpu_avr4,
        cpu_avr4,
        cpu_avr5,
        cpu_avr5,
        cpu_avr51,
        cpu_avr51,
-       cpu_avr6
+       cpu_avr6,
+       cpu_avrxmega3
       );
       );
 
 
    tfputype =
    tfputype =
      (fpu_none,
      (fpu_none,
       fpu_soft,
       fpu_soft,
-      fp_libgcc
+      fpu_libgcc
      );
      );
 
 
    tcontrollertype =
    tcontrollertype =
@@ -58,150 +62,225 @@ Type
 
 
       ct_avrsim,
       ct_avrsim,
 
 
-      ct_atmega645,
-      ct_atmega165a,
-      ct_attiny44a,
-      ct_atmega649a,
-      ct_atmega32u4,
-      ct_attiny26,
-      ct_at90usb1287,
+      ct_at90can32,
+      ct_at90can64,
+      ct_at90can128,
+      ct_at90pwm1,
+      ct_at90pwm2b,
+      ct_at90pwm3b,
+      ct_at90pwm81,
       ct_at90pwm161,
       ct_at90pwm161,
-      ct_attiny48,
-      ct_atmega168p,
-      ct_attiny10,
-      ct_attiny84a,
+      ct_at90pwm216,
+      ct_at90pwm316,
       ct_at90usb82,
       ct_at90usb82,
-      ct_attiny2313,
-      ct_attiny461,
-      ct_atmega3250pa,
-      ct_atmega3290a,
-      ct_atmega165p,
-      ct_attiny43u,
       ct_at90usb162,
       ct_at90usb162,
-      ct_atmega16u4,
-      ct_attiny24a,
-      ct_atmega88p,
-      ct_attiny88,
-      ct_atmega6490p,
-      ct_attiny40,
-      ct_atmega324p,
-      ct_attiny167,
-      ct_atmega328,
-      ct_attiny861,
-      ct_attiny85,
-      ct_atmega64m1,
-      ct_atmega645p,
-      ct_atmega8u2,
-      ct_atmega329a,
-      ct_atmega8a,
-      ct_atmega324pa,
-      ct_atmega32hvb,
-      ct_at90pwm316,
-      ct_at90pwm3b,
       ct_at90usb646,
       ct_at90usb646,
-      ct_attiny20,
-      ct_atmega16,
-      ct_atmega48a,
-      ct_attiny24,
-      ct_atmega644,
-      ct_atmega1284,
+      ct_at90usb647,
+      ct_at90usb1286,
+      ct_at90usb1287,
       ct_ata6285,
       ct_ata6285,
-      ct_at90can64,
-      ct_atmega48,
-      ct_at90can32,
-      ct_attiny9,
-      ct_attiny87,
-      ct_atmega1281,
-      ct_at90pwm216,
-      ct_atmega3250a,
-      ct_atmega88a,
-      ct_atmega128rfa1,
-      ct_atmega3290pa,
-      ct_at90pwm81,
-      ct_atmega325p,
-      ct_attiny84,
-      ct_atmega328p,
-      ct_attiny13a,
+      ct_ata6286,
       ct_atmega8,
       ct_atmega8,
-      ct_atmega1284p,
+      ct_atmega8a,
+      ct_atmega8hva,
+      ct_atmega8u2,
+      ct_atmega16,
+      ct_atmega16a,
+      ct_atmega16hva,
+      ct_atmega16hvb,
+      ct_atmega16hvbrevb,
+      ct_atmega16m1,
       ct_atmega16u2,
       ct_atmega16u2,
-      ct_attiny45,
-      ct_atmega3250,
-      ct_atmega329,
+      ct_atmega16u4,
+      ct_atmega32,
       ct_atmega32a,
       ct_atmega32a,
-      ct_attiny5,
-      ct_at90can128,
-      ct_atmega6490,
-      ct_atmega8515,
+      ct_atmega32c1,
+      ct_atmega32hvb,
+      ct_atmega32hvbrevb,
+      ct_atmega32m1,
+      ct_atmega32u2,
+      ct_atmega32u4,
+      ct_atmega48,
+      ct_atmega48a,
+      ct_atmega48p,
+      ct_atmega48pa,
+      ct_atmega48pb,
+      ct_atmega64,
+      ct_atmega64a,
+      ct_atmega64c1,
+      ct_atmega64hve2,
+      ct_atmega64m1,
+      ct_atmega64rfr2,
+      ct_atmega88,
+      ct_atmega88a,
+      ct_atmega88p,
       ct_atmega88pa,
       ct_atmega88pa,
-      ct_atmega168a,
+      ct_atmega88pb,
       ct_atmega128,
       ct_atmega128,
-      ct_at90usb1286,
-      ct_atmega164pa,
-      ct_attiny828,
-      ct_atmega88,
-      ct_atmega645a,
-      ct_atmega3290p,
-      ct_atmega644p,
-      ct_atmega164a,
-      ct_attiny4313,
-      ct_atmega162,
-      ct_atmega32c1,
       ct_atmega128a,
       ct_atmega128a,
-      ct_atmega324a,
-      ct_attiny13,
-      ct_atmega2561,
+      ct_atmega128rfa1,
+      ct_atmega128rfr2,
+      ct_atmega162,
+      ct_atmega164a,
+      ct_atmega164p,
+      ct_atmega164pa,
+      ct_atmega165a,
+      ct_atmega165p,
+      ct_atmega165pa,
+      ct_atmega168,
+      ct_atmega168a,
+      ct_atmega168p,
+      ct_atmega168pa,
+      ct_atmega168pb,
       ct_atmega169a,
       ct_atmega169a,
-      ct_attiny261,
-      ct_atmega644a,
-      ct_atmega3290,
-      ct_atmega64a,
       ct_atmega169p,
       ct_atmega169p,
-      ct_atmega2560,
-      ct_atmega32,
-      ct_attiny861a,
-      ct_attiny28,
-      ct_atmega48p,
-      ct_atmega8535,
-      ct_atmega168pa,
-      ct_atmega16m1,
-      ct_atmega16hvb,
-      ct_atmega164p,
+      ct_atmega169pa,
+      ct_atmega256rfr2,
+      ct_atmega324a,
+      ct_atmega324p,
+      ct_atmega324pa,
+      ct_atmega324pb,
+      ct_atmega325,
       ct_atmega325a,
       ct_atmega325a,
+      ct_atmega325p,
+      ct_atmega325pa,
+      ct_atmega328,
+      ct_atmega328p,
+      ct_atmega328pb,
+      ct_atmega329,
+      ct_atmega329a,
+      ct_atmega329p,
+      ct_atmega329pa,
+      ct_atmega406,
       ct_atmega640,
       ct_atmega640,
+      ct_atmega644,
+      ct_atmega644a,
+      ct_atmega644p,
+      ct_atmega644pa,
+      ct_atmega644rfr2,
+      ct_atmega645,
+      ct_atmega645a,
+      ct_atmega645p,
+      ct_atmega649,
+      ct_atmega649a,
+      ct_atmega649p,
+      ct_atmega808,
+      ct_atmega809,
+      ct_atmega1280,
+      ct_atmega1281,
+      ct_atmega1284,
+      ct_atmega1284p,
+      ct_atmega1284rfr2,
+      ct_atmega1608,
+      ct_atmega1609,
+      ct_atmega2560,
+      ct_atmega2561,
+      ct_atmega2564rfr2,
+      ct_atmega3208,
+      ct_atmega3209,
+      ct_atmega3250,
+      ct_atmega3250a,
+      ct_atmega3250p,
+      ct_atmega3250pa,
+      ct_atmega3290,
+      ct_atmega3290a,
+      ct_atmega3290p,
+      ct_atmega3290pa,
+      ct_atmega4808,
+      ct_atmega4809,
       ct_atmega6450,
       ct_atmega6450,
-      ct_atmega329p,
-      ct_ata6286,
-      ct_at90usb647,
-      ct_atmega168,
+      ct_atmega6450a,
+      ct_atmega6450p,
+      ct_atmega6490,
       ct_atmega6490a,
       ct_atmega6490a,
-      ct_atmega32m1,
-      ct_atmega64c1,
-      ct_atmega32u2,
+      ct_atmega6490p,
+      ct_atmega8515,
+      ct_atmega8535,
       ct_attiny4,
       ct_attiny4,
-      ct_atmega644pa,
-      ct_at90pwm1,
+      ct_attiny5,
+      ct_attiny9,
+      ct_attiny10,
+      ct_attiny11,
+      ct_attiny12,
+      ct_attiny13,
+      ct_attiny13a,
+      ct_attiny15,
+      ct_attiny20,
+      ct_attiny24,
+      ct_attiny24a,
+      ct_attiny25,
+      ct_attiny26,
+      ct_attiny28,
+      ct_attiny40,
+      ct_attiny43u,
       ct_attiny44,
       ct_attiny44,
-      ct_atmega325pa,
-      ct_atmega6450a,
-      ct_attiny2313a,
-      ct_atmega329pa,
+      ct_attiny44a,
+      ct_attiny45,
+      ct_attiny48,
+      ct_attiny84,
+      ct_attiny84a,
+      ct_attiny85,
+      ct_attiny87,
+      ct_attiny88,
+      ct_attiny102,
+      ct_attiny104,
+      ct_attiny167,
+      ct_attiny202,
+      ct_attiny204,
+      ct_attiny212,
+      ct_attiny214,
+      ct_attiny261,
+      ct_attiny261a,
+      ct_attiny402,
+      ct_attiny404,
+      ct_attiny406,
+      ct_attiny412,
+      ct_attiny414,
+      ct_attiny416,
+      ct_attiny416auto,
+      ct_attiny417,
+      ct_attiny441,
+      ct_attiny461,
       ct_attiny461a,
       ct_attiny461a,
-      ct_atmega6450p,
-      ct_atmega64,
-      ct_atmega165pa,
-      ct_atmega16a,
-      ct_atmega649,
-      ct_atmega1280,
-      ct_at90pwm2b,
-      ct_atmega649p,
-      ct_atmega3250p,
-      ct_atmega48pa,
+      ct_attiny804,
+      ct_attiny806,
+      ct_attiny807,
+      ct_attiny814,
+      ct_attiny816,
+      ct_attiny817,
+      ct_attiny828,
+      ct_attiny841,
+      ct_attiny861,
+      ct_attiny861a,
+      ct_attiny1604,
+      ct_attiny1606,
+      ct_attiny1607,
+      ct_attiny1614,
+      ct_attiny1616,
+      ct_attiny1617,
+      ct_attiny1624,
+      ct_attiny1626,
+      ct_attiny1627,
       ct_attiny1634,
       ct_attiny1634,
-      ct_atmega325,
-      ct_atmega169pa,
-      ct_attiny261a,
-      ct_attiny25
+      ct_attiny2313,
+      ct_attiny2313a,
+      ct_attiny3214,
+      ct_attiny3216,
+      ct_attiny3217,
+      ct_attiny4313,
+      // Controller board aliases
+      ct_arduinoleonardo,
+      ct_arduinomega,
+      ct_arduinomicro,
+      ct_arduinonano,
+      ct_arduinonanoevery,
+      ct_arduinouno,
+      ct_atmega256rfr2xpro,
+      ct_atmega324pbxpro,
+      ct_atmega1284pxplained,
+      ct_atmega4809xpro,
+      ct_attiny817xpro,
+      ct_attiny3217xpro
      );
      );
 
 
    tcontrollerdatatype = record
    tcontrollerdatatype = record
@@ -216,8 +295,6 @@ Const
    ControllerSupport = true;
    ControllerSupport = true;
    {# Size of native extended floating point type }
    {# Size of native extended floating point type }
    extended_size = 12;
    extended_size = 12;
-   {# Size of a multimedia register               }
-   mmreg_size = 16;
    { target cpu string (used by compiler options) }
    { target cpu string (used by compiler options) }
    target_cpu_string = 'avr';
    target_cpu_string = 'avr';
 
 
@@ -234,7 +311,8 @@ Const
      pocall_softfloat
      pocall_softfloat
    ];
    ];
 
 
-   cputypestr : array[tcputype] of string[5] = ('',
+   cputypestr : array[tcputype] of string[9] = ('',
+     'AVRTINY',
      'AVR1',
      'AVR1',
      'AVR2',
      'AVR2',
      'AVR25',
      'AVR25',
@@ -244,7 +322,8 @@ Const
      'AVR4',
      'AVR4',
      'AVR5',
      'AVR5',
      'AVR51',
      'AVR51',
-     'AVR6'
+     'AVR6',
+     'AVRXMEGA3'
    );
    );
 
 
    fputypestr : array[tfputype] of string[6] = (
    fputypestr : array[tfputype] of string[6] = (
@@ -282,150 +361,225 @@ Const
         eeprombase:0;
         eeprombase:0;
         eepromsize:4096;
         eepromsize:4096;
         )
         )
-        ,(controllertypestr:'ATMEGA645'; controllerunitstr:'ATMEGA645'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165A'; controllerunitstr:'ATMEGA165A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44A'; controllerunitstr:'ATTINY44A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA649A'; controllerunitstr:'ATMEGA649A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U4'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY26'; controllerunitstr:'ATTINY26'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'AT90USB1287'; controllerunitstr:'AT90USB1287'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM161'; controllerunitstr:'AT90PWM161'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY48'; controllerunitstr:'ATTINY48'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA168P'; controllerunitstr:'ATMEGA168P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY10'; controllerunitstr:'ATTINY10'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY84A'; controllerunitstr:'ATTINY84A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB82'; controllerunitstr:'AT90USB82'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY2313'; controllerunitstr:'ATTINY2313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY461'; controllerunitstr:'ATTINY461'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250PA'; controllerunitstr:'ATMEGA3250PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA3290A'; controllerunitstr:'ATMEGA3290A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA165P'; controllerunitstr:'ATMEGA165P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY43U'; controllerunitstr:'ATTINY43U'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'AT90USB162'; controllerunitstr:'AT90USB162'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16U4'; controllerunitstr:'ATMEGA16U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1280; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY24A'; controllerunitstr:'ATTINY24A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA88P'; controllerunitstr:'ATMEGA88P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY88'; controllerunitstr:'ATTINY88'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA6490P'; controllerunitstr:'ATMEGA6490P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY40'; controllerunitstr:'ATTINY40'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:64; sramsize:256; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA324P'; controllerunitstr:'ATMEGA324P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY167'; controllerunitstr:'ATTINY167'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328'; controllerunitstr:'ATMEGA328'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861'; controllerunitstr:'ATTINY861'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY85'; controllerunitstr:'ATTINY85'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA64M1'; controllerunitstr:'ATMEGA64M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA645P'; controllerunitstr:'ATMEGA645P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8U2'; controllerunitstr:'ATMEGA8U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA329A'; controllerunitstr:'ATMEGA329A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA8A'; controllerunitstr:'ATMEGA8A'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA324PA'; controllerunitstr:'ATMEGA324PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32HVB'; controllerunitstr:'ATMEGA32HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM316'; controllerunitstr:'AT90PWM316'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90PWM3B'; controllerunitstr:'AT90PWM3B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB646'; controllerunitstr:'AT90USB646'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY20'; controllerunitstr:'ATTINY20'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:64; sramsize:128; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA16'; controllerunitstr:'ATMEGA16'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA48A'; controllerunitstr:'ATMEGA48A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY24'; controllerunitstr:'ATTINY24'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644'; controllerunitstr:'ATMEGA644'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1284'; controllerunitstr:'ATMEGA1284'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATA6285'; controllerunitstr:'ATA6285'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90CAN64'; controllerunitstr:'AT90CAN64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA48'; controllerunitstr:'ATMEGA48'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'AT90CAN32'; controllerunitstr:'AT90CAN32'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY9'; controllerunitstr:'ATTINY9'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY87'; controllerunitstr:'ATTINY87'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1281'; controllerunitstr:'ATMEGA1281'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM216'; controllerunitstr:'AT90PWM216'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA3250A'; controllerunitstr:'ATMEGA3250A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA88A'; controllerunitstr:'ATMEGA88A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128RFA1'; controllerunitstr:'ATMEGA128RFA1'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA3290PA'; controllerunitstr:'ATMEGA3290PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM81'; controllerunitstr:'AT90PWM81'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:256; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325P'; controllerunitstr:'ATMEGA325P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY84'; controllerunitstr:'ATTINY84'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328P'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13A'; controllerunitstr:'ATTINY13A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA8'; controllerunitstr:'ATMEGA8'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1284P'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA16U2'; controllerunitstr:'ATMEGA16U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY45'; controllerunitstr:'ATTINY45'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250'; controllerunitstr:'ATMEGA3250'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA329'; controllerunitstr:'ATMEGA329'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32A'; controllerunitstr:'ATMEGA32A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY5'; controllerunitstr:'ATTINY5'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'AT90CAN128'; controllerunitstr:'AT90CAN128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6490'; controllerunitstr:'ATMEGA6490'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8515'; controllerunitstr:'ATMEGA8515'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA88PA'; controllerunitstr:'ATMEGA88PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168A'; controllerunitstr:'ATMEGA168A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128'; controllerunitstr:'ATMEGA128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90USB1286'; controllerunitstr:'AT90USB1286'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA164PA'; controllerunitstr:'ATMEGA164PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY828'; controllerunitstr:'ATTINY828'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA88'; controllerunitstr:'ATMEGA88'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA645A'; controllerunitstr:'ATMEGA645A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290P'; controllerunitstr:'ATMEGA3290P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA644P'; controllerunitstr:'ATMEGA644P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA164A'; controllerunitstr:'ATMEGA164A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY4313'; controllerunitstr:'ATTINY4313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA162'; controllerunitstr:'ATMEGA162'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA32C1'; controllerunitstr:'ATMEGA32C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA128A'; controllerunitstr:'ATMEGA128A'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA324A'; controllerunitstr:'ATMEGA324A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13'; controllerunitstr:'ATTINY13'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA2561'; controllerunitstr:'ATMEGA2561'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA169A'; controllerunitstr:'ATMEGA169A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261'; controllerunitstr:'ATTINY261'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644A'; controllerunitstr:'ATMEGA644A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290'; controllerunitstr:'ATMEGA3290'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64A'; controllerunitstr:'ATMEGA64A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA169P'; controllerunitstr:'ATMEGA169P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA2560'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA32'; controllerunitstr:'ATMEGA32'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861A'; controllerunitstr:'ATTINY861A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY28'; controllerunitstr:'ATTINY28'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:0; sramsize:0; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA48P'; controllerunitstr:'ATMEGA48P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA8535'; controllerunitstr:'ATMEGA8535'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168PA'; controllerunitstr:'ATMEGA168PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16M1'; controllerunitstr:'ATMEGA16M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16HVB'; controllerunitstr:'ATMEGA16HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA164P'; controllerunitstr:'ATMEGA164P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325A'; controllerunitstr:'ATMEGA325A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA640'; controllerunitstr:'ATMEGA640'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6450'; controllerunitstr:'ATMEGA6450'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA329P'; controllerunitstr:'ATMEGA329P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATA6286'; controllerunitstr:'ATA6286'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90USB647'; controllerunitstr:'AT90USB647'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA168'; controllerunitstr:'ATMEGA168'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA6490A'; controllerunitstr:'ATMEGA6490A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32M1'; controllerunitstr:'ATMEGA32M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64C1'; controllerunitstr:'ATMEGA64C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U2'; controllerunitstr:'ATMEGA32U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:1024; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY4'; controllerunitstr:'ATTINY4'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA644PA'; controllerunitstr:'ATMEGA644PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'AT90PWM1'; controllerunitstr:'AT90PWM1'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44'; controllerunitstr:'ATTINY44'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325PA'; controllerunitstr:'ATMEGA325PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA6450A'; controllerunitstr:'ATMEGA6450A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY2313A'; controllerunitstr:'ATTINY2313A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA329PA'; controllerunitstr:'ATMEGA329PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY461A'; controllerunitstr:'ATTINY461A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA6450P'; controllerunitstr:'ATMEGA6450P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA64'; controllerunitstr:'ATMEGA64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165PA'; controllerunitstr:'ATMEGA165PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16A'; controllerunitstr:'ATMEGA16A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649'; controllerunitstr:'ATMEGA649'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1280'; controllerunitstr:'ATMEGA1280'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM2B'; controllerunitstr:'AT90PWM2B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649P'; controllerunitstr:'ATMEGA649P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3250P'; controllerunitstr:'ATMEGA3250P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA48PA'; controllerunitstr:'ATMEGA48PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY1634'; controllerunitstr:'ATTINY1634'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325'; controllerunitstr:'ATMEGA325'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA169PA'; controllerunitstr:'ATMEGA169PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261A'; controllerunitstr:'ATTINY261A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY25'; controllerunitstr:'ATTINY25'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
+        ,(controllertypestr:'AT90CAN32';controllerunitstr:'AT90CAN32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'AT90CAN64';controllerunitstr:'AT90CAN64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90CAN128';controllerunitstr:'AT90CAN128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90PWM1';controllerunitstr:'AT90PWM1';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM2B';controllerunitstr:'AT90PWM2B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM3B';controllerunitstr:'AT90PWM3B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM81';controllerunitstr:'AT90PWM81';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:256;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM161';controllerunitstr:'AT90PWM161';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM216';controllerunitstr:'AT90PWM216';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM316';controllerunitstr:'AT90PWM316';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB82';controllerunitstr:'AT90USB82';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB162';controllerunitstr:'AT90USB162';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB646';controllerunitstr:'AT90USB646';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB647';controllerunitstr:'AT90USB647';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB1286';controllerunitstr:'AT90USB1286';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90USB1287';controllerunitstr:'AT90USB1287';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATA6285';controllerunitstr:'ATA6285';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATA6286';controllerunitstr:'ATA6286';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATMEGA8';controllerunitstr:'ATMEGA8';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8A';controllerunitstr:'ATMEGA8A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8HVA';controllerunitstr:'ATMEGA8HVA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA8U2';controllerunitstr:'ATMEGA8U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16';controllerunitstr:'ATMEGA16';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16A';controllerunitstr:'ATMEGA16A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVA';controllerunitstr:'ATMEGA16HVA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA16HVB';controllerunitstr:'ATMEGA16HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVBREVB';controllerunitstr:'ATMEGA16HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16M1';controllerunitstr:'ATMEGA16M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U2';controllerunitstr:'ATMEGA16U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U4';controllerunitstr:'ATMEGA16U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1280;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA32';controllerunitstr:'ATMEGA32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32A';controllerunitstr:'ATMEGA32A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32C1';controllerunitstr:'ATMEGA32C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVB';controllerunitstr:'ATMEGA32HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVBREVB';controllerunitstr:'ATMEGA32HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32M1';controllerunitstr:'ATMEGA32M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U2';controllerunitstr:'ATMEGA32U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:1024;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U4';controllerunitstr:'ATMEGA32U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2560;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA48';controllerunitstr:'ATMEGA48';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48A';controllerunitstr:'ATMEGA48A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48P';controllerunitstr:'ATMEGA48P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PA';controllerunitstr:'ATMEGA48PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PB';controllerunitstr:'ATMEGA48PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA64';controllerunitstr:'ATMEGA64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64A';controllerunitstr:'ATMEGA64A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64C1';controllerunitstr:'ATMEGA64C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64HVE2';controllerunitstr:'ATMEGA64HVE2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA64M1';controllerunitstr:'ATMEGA64M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64RFR2';controllerunitstr:'ATMEGA64RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA88';controllerunitstr:'ATMEGA88';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88A';controllerunitstr:'ATMEGA88A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88P';controllerunitstr:'ATMEGA88P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PA';controllerunitstr:'ATMEGA88PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PB';controllerunitstr:'ATMEGA88PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA128';controllerunitstr:'ATMEGA128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128A';controllerunitstr:'ATMEGA128A';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFA1';controllerunitstr:'ATMEGA128RFA1';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFR2';controllerunitstr:'ATMEGA128RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA162';controllerunitstr:'ATMEGA162';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164A';controllerunitstr:'ATMEGA164A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164P';controllerunitstr:'ATMEGA164P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164PA';controllerunitstr:'ATMEGA164PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165A';controllerunitstr:'ATMEGA165A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165P';controllerunitstr:'ATMEGA165P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165PA';controllerunitstr:'ATMEGA165PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168';controllerunitstr:'ATMEGA168';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168A';controllerunitstr:'ATMEGA168A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168P';controllerunitstr:'ATMEGA168P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PA';controllerunitstr:'ATMEGA168PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PB';controllerunitstr:'ATMEGA168PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169A';controllerunitstr:'ATMEGA169A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169P';controllerunitstr:'ATMEGA169P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169PA';controllerunitstr:'ATMEGA169PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA256RFR2';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324A';controllerunitstr:'ATMEGA324A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324P';controllerunitstr:'ATMEGA324P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PA';controllerunitstr:'ATMEGA324PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PB';controllerunitstr:'ATMEGA324PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325';controllerunitstr:'ATMEGA325';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325A';controllerunitstr:'ATMEGA325A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325P';controllerunitstr:'ATMEGA325P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325PA';controllerunitstr:'ATMEGA325PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328';controllerunitstr:'ATMEGA328';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328P';controllerunitstr:'ATMEGA328P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328PB';controllerunitstr:'ATMEGA328PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329';controllerunitstr:'ATMEGA329';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329A';controllerunitstr:'ATMEGA329A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329P';controllerunitstr:'ATMEGA329P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329PA';controllerunitstr:'ATMEGA329PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA406';controllerunitstr:'ATMEGA406';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:40960;srambase:256;sramsize:2048;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA640';controllerunitstr:'ATMEGA640';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA644';controllerunitstr:'ATMEGA644';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644A';controllerunitstr:'ATMEGA644A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644P';controllerunitstr:'ATMEGA644P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644PA';controllerunitstr:'ATMEGA644PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644RFR2';controllerunitstr:'ATMEGA644RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645';controllerunitstr:'ATMEGA645';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645A';controllerunitstr:'ATMEGA645A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645P';controllerunitstr:'ATMEGA645P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649';controllerunitstr:'ATMEGA649';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649A';controllerunitstr:'ATMEGA649A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649P';controllerunitstr:'ATMEGA649P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA808';controllerunitstr:'ATMEGA808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA809';controllerunitstr:'ATMEGA809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1280';controllerunitstr:'ATMEGA1280';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1281';controllerunitstr:'ATMEGA1281';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284';controllerunitstr:'ATMEGA1284';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284P';controllerunitstr:'ATMEGA1284P';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284RFR2';controllerunitstr:'ATMEGA1284RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1608';controllerunitstr:'ATMEGA1608';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1609';controllerunitstr:'ATMEGA1609';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA2560';controllerunitstr:'ATMEGA2560';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2561';controllerunitstr:'ATMEGA2561';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2564RFR2';controllerunitstr:'ATMEGA2564RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA3208';controllerunitstr:'ATMEGA3208';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3209';controllerunitstr:'ATMEGA3209';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3250';controllerunitstr:'ATMEGA3250';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250A';controllerunitstr:'ATMEGA3250A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250P';controllerunitstr:'ATMEGA3250P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250PA';controllerunitstr:'ATMEGA3250PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290';controllerunitstr:'ATMEGA3290';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290A';controllerunitstr:'ATMEGA3290A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290P';controllerunitstr:'ATMEGA3290P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290PA';controllerunitstr:'ATMEGA3290PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA4808';controllerunitstr:'ATMEGA4808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA4809';controllerunitstr:'ATMEGA4809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA6450';controllerunitstr:'ATMEGA6450';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450A';controllerunitstr:'ATMEGA6450A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450P';controllerunitstr:'ATMEGA6450P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490';controllerunitstr:'ATMEGA6490';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490A';controllerunitstr:'ATMEGA6490A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490P';controllerunitstr:'ATMEGA6490P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA8515';controllerunitstr:'ATMEGA8515';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8535';controllerunitstr:'ATMEGA8535';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY4';controllerunitstr:'ATTINY4';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY5';controllerunitstr:'ATTINY5';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY9';controllerunitstr:'ATTINY9';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY10';controllerunitstr:'ATTINY10';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY11';controllerunitstr:'ATTINY11';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY12';controllerunitstr:'ATTINY12';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13';controllerunitstr:'ATTINY13';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13A';controllerunitstr:'ATTINY13A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY15';controllerunitstr:'ATTINY15';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY20';controllerunitstr:'ATTINY20';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:64;sramsize:128;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY24';controllerunitstr:'ATTINY24';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY24A';controllerunitstr:'ATTINY24A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY25';controllerunitstr:'ATTINY25';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY26';controllerunitstr:'ATTINY26';cputype:cpu_avr2;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY28';controllerunitstr:'ATTINY28';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY40';controllerunitstr:'ATTINY40';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:64;sramsize:256;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY43U';controllerunitstr:'ATTINY43U';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY44';controllerunitstr:'ATTINY44';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY44A';controllerunitstr:'ATTINY44A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY45';controllerunitstr:'ATTINY45';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY48';controllerunitstr:'ATTINY48';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY84';controllerunitstr:'ATTINY84';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY84A';controllerunitstr:'ATTINY84A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY85';controllerunitstr:'ATTINY85';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY87';controllerunitstr:'ATTINY87';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY88';controllerunitstr:'ATTINY88';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY102';controllerunitstr:'ATTINY102';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY104';controllerunitstr:'ATTINY104';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY167';controllerunitstr:'ATTINY167';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY202';controllerunitstr:'ATTINY202';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY204';controllerunitstr:'ATTINY204';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY212';controllerunitstr:'ATTINY212';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY214';controllerunitstr:'ATTINY214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY261';controllerunitstr:'ATTINY261';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY261A';controllerunitstr:'ATTINY261A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY402';controllerunitstr:'ATTINY402';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY404';controllerunitstr:'ATTINY404';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY406';controllerunitstr:'ATTINY406';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY412';controllerunitstr:'ATTINY412';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY414';controllerunitstr:'ATTINY414';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416';controllerunitstr:'ATTINY416';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416AUTO';controllerunitstr:'ATTINY416AUTO';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY417';controllerunitstr:'ATTINY417';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY441';controllerunitstr:'ATTINY441';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461';controllerunitstr:'ATTINY461';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461A';controllerunitstr:'ATTINY461A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY804';controllerunitstr:'ATTINY804';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY806';controllerunitstr:'ATTINY806';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY807';controllerunitstr:'ATTINY807';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY814';controllerunitstr:'ATTINY814';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY816';controllerunitstr:'ATTINY816';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY817';controllerunitstr:'ATTINY817';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY828';controllerunitstr:'ATTINY828';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY841';controllerunitstr:'ATTINY841';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861';controllerunitstr:'ATTINY861';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861A';controllerunitstr:'ATTINY861A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY1604';controllerunitstr:'ATTINY1604';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1606';controllerunitstr:'ATTINY1606';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1607';controllerunitstr:'ATTINY1607';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1614';controllerunitstr:'ATTINY1614';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1616';controllerunitstr:'ATTINY1616';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1617';controllerunitstr:'ATTINY1617';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1624';controllerunitstr:'ATTINY1624';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1626';controllerunitstr:'ATTINY1626';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1627';controllerunitstr:'ATTINY1627';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1634';controllerunitstr:'ATTINY1634';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY2313';controllerunitstr:'ATTINY2313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY2313A';controllerunitstr:'ATTINY2313A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY3214';controllerunitstr:'ATTINY3214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3216';controllerunitstr:'ATTINY3216';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3217';controllerunitstr:'ATTINY3217';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY4313';controllerunitstr:'ATTINY4313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        // Controller board aliases
+        ,(controllertypestr:'ARDUINOLEONARDO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINOMEGA'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ARDUINOMICRO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANOEVERY'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ARDUINOUNO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA256RFR2XPRO';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324PBXPRO'; controllerunitstr:'ATMEGA324PB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA1284PXPLAINED'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ATMEGA4809XPRO'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ATTINY817XPRO'; controllerunitstr:'ATTINY817'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:15872; sramsize:512; eeprombase:5120; eepromsize:128)
+        ,(controllertypestr:'ATTINY3217XPRO'; controllerunitstr:'ATTINY3217'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:14336; sramsize:2048; eeprombase:5120; eepromsize:256)
    );
    );
 
 
    { Supported optimizations, only used for information }
    { Supported optimizations, only used for information }
@@ -434,12 +588,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
 
@@ -453,22 +607,26 @@ Const
        CPUAVR_HAS_ELPM,
        CPUAVR_HAS_ELPM,
        CPUAVR_HAS_ELPMX,
        CPUAVR_HAS_ELPMX,
        CPUAVR_2_BYTE_PC,
        CPUAVR_2_BYTE_PC,
-       CPUAVR_3_BYTE_PC
+       CPUAVR_3_BYTE_PC,
+       CPUAVR_16_REGS,
+       CPUAVR_NOMEMMAPPED_REGS
       );
       );
 
 
  const
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
    cpu_capabilities : array[tcputype] of set of tcpuflags =
-     ( { cpu_none  } [],
-       { cpu_avr1  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr2  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr25 } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr3  } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
-       { cpu_avr31 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
-       { cpu_avr35 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr4  } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr5  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr51 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr6  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC]
+     ( { cpu_none      } [],
+       { cpu_avrtiny   } [CPUAVR_16_REGS,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS],
+       { cpu_avr1      } [CPUAVR_2_BYTE_PC],
+       { cpu_avr2      } [CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr25     } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr3      } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
+       { cpu_avr31     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
+       { cpu_avr35     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr4      } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr5      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr51     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr6      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC],
+       { cpu_avrxmega3 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS]
      );
      );
 
 
 Implementation
 Implementation

+ 1 - 0
compiler/avr/cpunode.pas

@@ -37,6 +37,7 @@ unit cpunode;
        ,navradd
        ,navradd
        ,navrmat
        ,navrmat
        ,navrcnv
        ,navrcnv
+       ,navrinl
        ,navrmem
        ,navrmem
        ,navrutil,
        ,navrutil,
        { symtable }
        { symtable }

+ 38 - 21
compiler/avr/cpupara.pas

@@ -39,7 +39,7 @@ unit cpupara;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
           function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
          private
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
@@ -57,7 +57,10 @@ unit cpupara;
 
 
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
       begin
-        result:=VOLATILE_INTREGISTERS;
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          result:=VOLATILE_INTREGISTERS-[RS_R18,RS_R19]
+        else
+          result:=VOLATILE_INTREGISTERS;
       end;
       end;
 
 
 
 
@@ -167,7 +170,7 @@ unit cpupara;
             result:=not(def.size in [1,2,4]);
             result:=not(def.size in [1,2,4]);
           }
           }
           else
           else
-            if (def.size > 8) then
+            if (def.size > 8) or ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and (def.size > 4)) then
               result:=true
               result:=true
             else
             else
               result:=inherited ret_in_param(def,pd);
               result:=inherited ret_in_param(def,pd);
@@ -204,7 +207,8 @@ unit cpupara;
         begin
         begin
           { In case of po_delphi_nested_cc, the parent frame pointer
           { In case of po_delphi_nested_cc, the parent frame pointer
             is always passed on the stack. }
             is always passed on the stack. }
-           if (nextintreg>RS_R9) and
+           if (((nextintreg>RS_R9) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+               (nextintreg>RS_R21)) and
               (not(vo_is_parentfp in hp.varoptions) or
               (not(vo_is_parentfp in hp.varoptions) or
                not(po_delphi_nested_cc in p.procoptions)) then
                not(po_delphi_nested_cc in p.procoptions)) then
              begin
              begin
@@ -220,7 +224,10 @@ unit cpupara;
                paraloc^.loc:=LOC_REFERENCE;
                paraloc^.loc:=LOC_REFERENCE;
                paraloc^.reference.index:=NR_STACK_POINTER_REG;
                paraloc^.reference.index:=NR_STACK_POINTER_REG;
                paraloc^.reference.offset:=stack_offset;
                paraloc^.reference.offset:=stack_offset;
+{$push}
+{$R-}
                dec(stack_offset,2);
                dec(stack_offset,2);
+{$pop}
             end;
             end;
         end;
         end;
 
 
@@ -300,7 +307,8 @@ unit cpupara;
                    by adding paralen mod 2, make the size even
                    by adding paralen mod 2, make the size even
                  }
                  }
                  nextintreg:=curintreg-(paralen+(paralen mod 2))+1;
                  nextintreg:=curintreg-(paralen+(paralen mod 2))+1;
-                 if nextintreg>=RS_R8 then
+                 if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                   (nextintreg>=RS_R20) then
                    curintreg:=nextintreg-1
                    curintreg:=nextintreg-1
                  else
                  else
                    begin
                    begin
@@ -335,7 +343,8 @@ unit cpupara;
                  case loc of
                  case loc of
                     LOC_REGISTER:
                     LOC_REGISTER:
                       begin
                       begin
-                        if nextintreg>=RS_R8 then
+                        if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                          (nextintreg>=RS_R20) then
                           begin
                           begin
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.size:=OS_8;
                             paraloc^.size:=OS_8;
@@ -352,19 +361,19 @@ unit cpupara;
                       begin
                       begin
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
                           begin
-                            paraloc^.size:=OS_ADDR;
-                            paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
-                            assignintreg
-                          end
+                           paraloc^.size:=OS_ADDR;
+                           paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
+                           assignintreg;
+                         end
                         else
                         else
                           begin
                           begin
-                             paraloc^.def:=hp.vardef;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,hp.vardef.size);
-                          end;
-                        dec(paralen,hp.vardef.size);
+                            paraloc^.def:=hp.vardef;
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            inc(stack_offset,paralen);
+                         end;
+                        paralen:=0;
                       end;
                       end;
                     else
                     else
                       internalerror(2002071002);
                       internalerror(2002071002);
@@ -526,17 +535,25 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
       var
       var
         cur_stack_offset: aword;
         cur_stack_offset: aword;
         curintreg, curfloatreg, curmmreg: tsuperregister;
         curintreg, curfloatreg, curmmreg: tsuperregister;
       begin
       begin
         init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset);
         init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset);
 
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
         if (p.proccalloption in cstylearrayofconst) then
         if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset)
+          begin
+            { just continue loading the parameters in the registers }
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  result:=create_paraloc_info_intern(p,side,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset)
+                else
+                  internalerror(2019021914);
+              end;
+          end
         else
         else
           internalerror(200410231);
           internalerror(200410231);
       end;
       end;

+ 3 - 2
compiler/avr/cpupi.pas

@@ -44,7 +44,7 @@ unit cpupi;
   implementation
   implementation
 
 
     uses
     uses
-       globals,systems,
+       globals,systems,verbose,
        cpubase,
        cpubase,
        aasmtai,aasmdata,
        aasmtai,aasmdata,
        tgobj,
        tgobj,
@@ -80,7 +80,8 @@ unit cpupi;
       begin
       begin
         { because of the limited branch distance of cond. branches, they must be replaced
         { because of the limited branch distance of cond. branches, they must be replaced
           sometimes by normal jmps and an inverse branch }
           sometimes by normal jmps and an inverse branch }
-        finalizeavrcode(aktproccode);
+        if not(finalizeavrcode(aktproccode)) then
+          message1(cg_w_cannot_compile_subroutine,procdef.fullprocname(false));
       end;
       end;
 
 
 begin
 begin

+ 2 - 3
compiler/avr/hlcgcpu.pas

@@ -38,8 +38,6 @@ interface
       procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
       procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
     end;
     end;
 
 
-  procedure create_hlcodegen;
-
 implementation
 implementation
 
 
   uses
   uses
@@ -52,7 +50,7 @@ implementation
     end;
     end;
 
 
 
 
-  procedure create_hlcodegen;
+  procedure create_hlcodegen_cpu;
     begin
     begin
       hlcg:=thlcgcpu.create;
       hlcg:=thlcgcpu.create;
       create_codegen;
       create_codegen;
@@ -60,4 +58,5 @@ implementation
 
 
 begin
 begin
   chlcgobj:=thlcgcpu;
   chlcgobj:=thlcgcpu;
+  create_hlcodegen:=@create_hlcodegen_cpu;
 end.
 end.

+ 37 - 9
compiler/avr/navradd.pas

@@ -137,8 +137,8 @@ interface
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,tmpreg2));
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,tmpreg2));
           for i:=2 to tcgsize2size[left.location.size] do
           for i:=2 to tcgsize2size[left.location.size] do
             begin
             begin
-              tmpreg1:=GetNextReg(tmpreg1);
-              tmpreg2:=GetNextReg(tmpreg2);
+              tmpreg1:=cg.GetNextReg(tmpreg1);
+              tmpreg2:=cg.GetNextReg(tmpreg2);
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
             end;
             end;
         end;
         end;
@@ -202,17 +202,45 @@ interface
               hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
               hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
           end;
           end;
 
 
+        if (not unsigned) and
+          (right.location.loc=LOC_CONSTANT) and
+          (right.location.value=0) and
+          (getresflags(unsigned) in [F_LT,F_GE]) then
+          begin
+            { This is a simple sign test, where we can just test the msb }
+            tmpreg1:=left.location.register;
+            for i:=2 to tcgsize2size[left.location.size] do
+              begin
+                if i=5 then
+                  tmpreg1:=left.location.registerhi
+                else
+                  tmpreg1:=cg.GetNextReg(tmpreg1);
+              end;
+
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,GetDefaultZeroReg));
+
+            location_reset(location,LOC_FLAGS,OS_NO);
+            location.resflags:=getresflags(unsigned);
+
+            exit;
+          end;
+
         if right.location.loc=LOC_CONSTANT then
         if right.location.loc=LOC_CONSTANT then
           begin
           begin
             { decrease register pressure on registers >= r16 }
             { decrease register pressure on registers >= r16 }
             if (right.location.value and $ff)=0 then
             if (right.location.value and $ff)=0 then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R1))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,GetDefaultZeroReg))
             else
             else
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CPI,left.location.register,right.location.value and $ff))
+              begin
+                cg.getcpuregister(current_asmdata.CurrAsmList,NR_R26);
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_LDI,NR_R26,right.location.value and $ff));
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R26));
+                cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R26);
+              end;
           end
           end
         { on the left side, we allow only a constant if it is 0 }
         { on the left side, we allow only a constant if it is 0 }
         else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
         else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,NR_R1,right.location.register))
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,right.location.register))
         else
         else
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,right.location.register));
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,right.location.register));
 
 
@@ -231,15 +259,15 @@ interface
             else
             else
               begin
               begin
                 if left.location.loc<>LOC_CONSTANT then
                 if left.location.loc<>LOC_CONSTANT then
-                  tmpreg1:=GetNextReg(tmpreg1);
+                  tmpreg1:=cg.GetNextReg(tmpreg1);
                 if right.location.loc<>LOC_CONSTANT then
                 if right.location.loc<>LOC_CONSTANT then
-                  tmpreg2:=GetNextReg(tmpreg2);
+                  tmpreg2:=cg.GetNextReg(tmpreg2);
               end;
               end;
             if right.location.loc=LOC_CONSTANT then
             if right.location.loc=LOC_CONSTANT then
               begin
               begin
                 { just use R1? }
                 { just use R1? }
                 if ((right.location.value64 shr ((i-1)*8)) and $ff)=0 then
                 if ((right.location.value64 shr ((i-1)*8)) and $ff)=0 then
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,NR_R1))
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,GetDefaultZeroReg))
                 else
                 else
                   begin
                   begin
                     tmpreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
                     tmpreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
@@ -249,7 +277,7 @@ interface
               end
               end
             { above it is checked, if left=0, then a constant is allowed }
             { above it is checked, if left=0, then a constant is allowed }
             else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
             else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,NR_R1,tmpreg2))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,GetDefaultZeroReg,tmpreg2))
             else
             else
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
           end;
           end;

Some files were not shown because too many files changed in this diff