Browse Source

* synchronize with trunk

git-svn-id: branches/z80@44397 -
nickysn 5 years ago
parent
commit
755fe97c51
100 changed files with 10277 additions and 3187 deletions
  1. 614 138
      .gitattributes
  2. 21 21
      .gitignore
  3. 181 202
      Makefile
  4. 30 46
      Makefile.fpc
  5. 320 152
      compiler/Makefile
  6. 307 70
      compiler/Makefile.fpc
  7. 2 1
      compiler/aarch64/a64att.inc
  8. 1 0
      compiler/aarch64/a64atts.inc
  9. 2 0
      compiler/aarch64/a64ins.dat
  10. 2 1
      compiler/aarch64/a64op.inc
  11. 67 2
      compiler/aarch64/a64reg.dat
  12. 106 10
      compiler/aarch64/aasmcpu.pas
  13. 15 4
      compiler/aarch64/agcpugas.pas
  14. 513 12
      compiler/aarch64/aoptcpu.pas
  15. 2 4
      compiler/aarch64/aoptcpub.pas
  16. 163 53
      compiler/aarch64/cgcpu.pas
  17. 76 24
      compiler/aarch64/cpubase.pas
  18. 8 4
      compiler/aarch64/cpuinfo.pas
  19. 7 4
      compiler/aarch64/cpunode.pas
  20. 125 85
      compiler/aarch64/cpupara.pas
  21. 3 0
      compiler/aarch64/cputarg.pas
  22. 11 7
      compiler/aarch64/hlcgcpu.pas
  23. 9 0
      compiler/aarch64/ncpuadd.pas
  24. 0 1
      compiler/aarch64/ncpucnv.pas
  25. 90 0
      compiler/aarch64/ncpucon.pas
  26. 94 1
      compiler/aarch64/ncpuinl.pas
  27. 75 9
      compiler/aarch64/ncpumat.pas
  28. 1 1
      compiler/aarch64/ncpumem.pas
  29. 121 6
      compiler/aarch64/ncpuset.pas
  30. 64 0
      compiler/aarch64/ra64con.inc
  31. 64 0
      compiler/aarch64/ra64dwa.inc
  32. 1 1
      compiler/aarch64/ra64nor.inc
  33. 64 0
      compiler/aarch64/ra64num.inc
  34. 201 137
      compiler/aarch64/ra64rni.inc
  35. 200 136
      compiler/aarch64/ra64sri.inc
  36. 64 0
      compiler/aarch64/ra64sta.inc
  37. 64 0
      compiler/aarch64/ra64std.inc
  38. 64 0
      compiler/aarch64/ra64sup.inc
  39. 3 0
      compiler/aarch64/racpu.pas
  40. 15 12
      compiler/aarch64/racpugas.pas
  41. 5 1
      compiler/aarch64/rgcpu.pas
  42. 16 113
      compiler/aasmbase.pas
  43. 198 0
      compiler/aasmcfi.pas
  44. 199 40
      compiler/aasmcnst.pas
  45. 112 4
      compiler/aasmdata.pas
  46. 1 1
      compiler/aasmdef.pas
  47. 4 0
      compiler/aasmsym.pas
  48. 375 62
      compiler/aasmtai.pas
  49. 322 89
      compiler/aggas.pas
  50. 22 5
      compiler/aopt.pas
  51. 49 25
      compiler/aoptbase.pas
  52. 2 2
      compiler/aoptda.pas
  53. 1198 140
      compiler/aoptobj.pas
  54. 52 6
      compiler/aoptutils.pas
  55. 248 89
      compiler/arm/aasmcpu.pas
  56. 53 15
      compiler/arm/agarmgas.pas
  57. 233 41
      compiler/arm/aoptcpu.pas
  58. 19 6
      compiler/arm/aoptcpub.pas
  59. 1 0
      compiler/arm/armatt.inc
  60. 1 0
      compiler/arm/armatts.inc
  61. 15 1
      compiler/arm/armins.dat
  62. 1 1
      compiler/arm/armnop.inc
  63. 1 0
      compiler/arm/armop.inc
  64. 70 0
      compiler/arm/armtab.inc
  65. 261 136
      compiler/arm/cgcpu.pas
  66. 76 20
      compiler/arm/cpubase.pas
  67. 23 5
      compiler/arm/cpuelf.pas
  68. 84 9
      compiler/arm/cpuinfo.pas
  69. 9 3
      compiler/arm/cpunode.pas
  70. 214 63
      compiler/arm/cpupara.pas
  71. 16 7
      compiler/arm/cpupi.pas
  72. 6 7
      compiler/arm/hlcgcpu.pas
  73. 24 22
      compiler/arm/narmadd.pas
  74. 1 1
      compiler/arm/narmcal.pas
  75. 10 14
      compiler/arm/narmcnv.pas
  76. 93 64
      compiler/arm/narmcon.pas
  77. 51 60
      compiler/arm/narminl.pas
  78. 172 0
      compiler/arm/narmld.pas
  79. 40 30
      compiler/arm/narmmat.pas
  80. 40 36
      compiler/arm/narmset.pas
  81. 327 0
      compiler/arm/narmutil.pas
  82. 17 12
      compiler/arm/raarmgas.pas
  83. 14 6
      compiler/arm/rgcpu.pas
  84. 2 2
      compiler/arm/symcpu.pas
  85. 121 0
      compiler/armgen/armpara.pas
  86. 279 18
      compiler/assemble.pas
  87. 115 24
      compiler/avr/aasmcpu.pas
  88. 25 16
      compiler/avr/agavrgas.pas
  89. 281 152
      compiler/avr/aoptcpu.pas
  90. 0 4
      compiler/avr/aoptcpub.pas
  91. 39 35
      compiler/avr/avrreg.dat
  92. 22 0
      compiler/avr/ccpuinnr.inc
  93. 374 281
      compiler/avr/cgcpu.pas
  94. 48 53
      compiler/avr/cpubase.pas
  95. 445 287
      compiler/avr/cpuinfo.pas
  96. 1 0
      compiler/avr/cpunode.pas
  97. 38 21
      compiler/avr/cpupara.pas
  98. 3 2
      compiler/avr/cpupi.pas
  99. 2 3
      compiler/avr/hlcgcpu.pas
  100. 37 9
      compiler/avr/navradd.pas

File diff suppressed because it is too large
+ 614 - 138
.gitattributes


+ 21 - 21
.gitignore

@@ -238,27 +238,6 @@ compiler/x86_64/lazbuild/fpcmade.*
 compiler/x86_64/lazbuild/units
 compiler/x86_64/units
 /fpcmade.*
-ide/*.bak
-ide/*.exe
-ide/*.o
-ide/*.ppu
-ide/*.s
-ide/compiler/*.bak
-ide/compiler/*.exe
-ide/compiler/*.o
-ide/compiler/*.ppu
-ide/compiler/*.s
-ide/compiler/fpcmade.*
-ide/compiler/units
-ide/fakegdb/*.bak
-ide/fakegdb/*.exe
-ide/fakegdb/*.o
-ide/fakegdb/*.ppu
-ide/fakegdb/*.s
-ide/fakegdb/fpcmade.*
-ide/fakegdb/units
-ide/fpcmade.*
-ide/units
 installer/*.bak
 installer/*.exe
 installer/*.o
@@ -2689,6 +2668,27 @@ packages/iconvenc/src/build-stamp.*
 packages/iconvenc/src/fpcmade.*
 packages/iconvenc/src/units
 packages/iconvenc/units
+packages/ide/*.bak
+packages/ide/*.exe
+packages/ide/*.o
+packages/ide/*.ppu
+packages/ide/*.s
+packages/ide/compiler/*.bak
+packages/ide/compiler/*.exe
+packages/ide/compiler/*.o
+packages/ide/compiler/*.ppu
+packages/ide/compiler/*.s
+packages/ide/compiler/fpcmade.*
+packages/ide/compiler/units
+packages/ide/fakegdb/*.bak
+packages/ide/fakegdb/*.exe
+packages/ide/fakegdb/*.o
+packages/ide/fakegdb/*.ppu
+packages/ide/fakegdb/*.s
+packages/ide/fakegdb/fpcmade.*
+packages/ide/fakegdb/units
+packages/ide/fpcmade.*
+packages/ide/units
 packages/imagemagick/*.bak
 packages/imagemagick/*.exe
 packages/imagemagick/*.o

File diff suppressed because it is too large
+ 181 - 202
Makefile


+ 30 - 46
Makefile.fpc

@@ -4,10 +4,10 @@
 
 [package]
 name=fpc
-version=3.1.1
+version=3.3.1
 
 [target]
-dirs=compiler rtl utils packages ide installer
+dirs=compiler rtl utils packages installer
 
 [require]
 nortl=y
@@ -20,7 +20,7 @@ fpcdir=.
 rule=help
 
 [prerules]
-REQUIREDVERSION=3.0.0
+REQUIREDVERSION=3.0.4
 REQUIREDVERSION2=3.0.2
 
 
@@ -49,6 +49,9 @@ endif
 ifeq ($(CPU_TARGET),sparc)
 PPSUF=sparc
 endif
+ifeq ($(CPU_TARGET),sparc64)
+PPSUF=sparc64
+endif
 ifeq ($(CPU_TARGET),powerpc)
 PPSUF=ppc
 endif
@@ -82,6 +85,15 @@ endif
 ifeq ($(CPU_TARGET),aarch64)
 PPSUF=a64
 endif
+ifeq ($(CPU_TARGET),riscv32)
+PPSUF=rv32
+endif
+ifeq ($(CPU_TARGET),riscv64)
+PPSUF=rv64
+endif
+ifeq ($(CPU_TARGET),xtensa)
+PPSUF=xtensa
+endif
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
@@ -188,42 +200,24 @@ endif
 ifneq ($(OPT),)
 OPTNEW+=$(OPT)
 endif
-CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
-INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 
-# This list should be the same as in fpcbuild/Makefile.fpc and in ide/Makefile.fpc
-GDBMI_DEFAULT_OS_LIST=aix freebsd haiku linux netbsd openbsd solaris win32 win64
-#  Determine if we should use GDBMI for Text Mode IDE compilation
-ifndef NOGDBMI
-ifneq ($(findstring $(OS_TARGET),$(GDBMI_DEFAULT_OS_LIST)),)
-export GDBMI=1
+# some targets do not generate PIC by default so we have select explicitly 
+# the general threading model when compiling the final versions of rtl and packages
+ifneq ($(findstring $(OS_TARGET),linux),)
+ifneq ($(findstring $(CPU_TARGET),i386 arm),)
+override OPTNEW+=-CVglobal-dynamic
 endif
 endif
 
-# Compile also IDE now enabled even for cross-compilation
-# if GDBMI is set
-ifdef GDBMI
-ifneq ($(GDBMI),0)
-IDE=1
-endif
-else # not GDBMI
-# Skipped by default for cross compiles, because it depends on libc
-ifndef CROSSCOMPILE
-ifneq ($(wildcard ide),)
-IDETARGETS=go32v2 win32 win64 linux freebsd os2 emx beos haiku
-ifneq ($(findstring $(OS_TARGET),$(IDETARGETS)),)
-IDE=1
-endif
-endif
-endif
-endif # not GDBMI
+CLEANOPTS=FPC=$(PPNEW)
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)' 'FPCMAKEOPT=$(OPT)'
+INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 
 # CPU targets for which we only build the compiler/rtl
 BuildOnlyBaseCPUs=jvm
 
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -325,19 +319,11 @@ endif
 ifdef UTILS
         $(MAKE) utils_clean $(CLEANOPTS)
 endif
-ifdef IDE
-        $(MAKE) ide_clean $(CLEANOPTS)
-        $(MAKE) installer_clean $(CLEANOPTS)
-endif
 # build everything
         $(MAKE) rtl_$(ALLTARGET) $(BUILDOPTS)
         $(MAKE) packages_$(ALLTARGET) $(BUILDOPTS)
 ifdef UTILS
         $(MAKE) utils_all $(BUILDOPTS)
-endif
-ifdef IDE
-        $(MAKE) ide_all $(BUILDOPTS)
-        $(MAKE) installer_all $(BUILDOPTS)
 endif
         $(ECHOREDIR) Build > $(BUILDSTAMP)
         $(ECHOREDIR) Build > base.$(BUILDSTAMP)
@@ -370,9 +356,6 @@ installother:
 ifdef UTILS
         $(MAKE) utils_$(INSTALLTARGET) $(INSTALLOPTS)
 endif
-ifdef IDE
-        $(MAKE) ide_$(INSTALLTARGET) $(BUILDOPTS)
-endif
 
 zipinstallbase:
         $(MAKE) fpc_zipinstall ZIPTARGET=installbase ZIPNAME=base $(INSTALLOPTS)
@@ -382,9 +365,6 @@ zipinstallother:
 ifdef UTILS
         $(MAKE) utils_zip$(INSTALLTARGET) $(INSTALLOPTS)
 endif
-ifdef IDE
-        $(MAKE) ide_zip$(INSTALLTARGET) $(INSTALLOPTS)
-endif
 
 
 installall: $(BUILDSTAMP)
@@ -395,7 +375,11 @@ endif
 
 singlezipinstall: zipinstallall
 zipinstallall: $(BUILDSTAMP)
-        $(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+ifeq ($(FULL_SOURCE),$(FULL_TARGET))
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+else
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX).built.on.$(SOURCESUFFIX) $(INSTALLOPTS)
+endif
 
 
 ##########################################################################
@@ -414,4 +398,4 @@ crosszipinstall:
         $(MAKE) zipinstall CROSSINSTALL=1
 
 crosssinglezipinstall:
-        $(MAKE) fpc_zipinstall ZIPTARGET=crossinstall ZIPNAME=fpc $(INSTALLOPTS)
+	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(SOURCESUFFIX).cross.$(TARGETSUFFIX) $(INSTALLOPTS)

File diff suppressed because it is too large
+ 320 - 152
compiler/Makefile


+ 307 - 70
compiler/Makefile.fpc

@@ -4,14 +4,14 @@
 
 [package]
 name=compiler
-version=3.1.1
+version=3.3.1
 
 [target]
 programs=pp
 dirs=utils
 
 [compiler]
-targetdir=.
+targetdir=$(CPU_UNITDIR)/bin/$(FULL_TARGET)
 unittargetdir=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 unitdir=$(COMPILERSOURCEDIR)
 includedir=$(CPC_TARGET)
@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa
 
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
@@ -47,6 +47,9 @@ endif
 ifdef SPARC
 PPC_TARGET=sparc
 endif
+ifdef SPARC64
+PPC_TARGET=sparc64
+endif
 ifdef M68K
 PPC_TARGET=m68k
 endif
@@ -80,6 +83,15 @@ endif
 ifdef AARCH64
 PPC_TARGET=aarch64
 endif
+ifdef RISCV32
+PPC_TARGET=riscv32
+endif
+ifdef RISCV64
+PPC_TARGET=riscv64
+endif
+ifdef XTENSA
+PPC_TARGET=xtensa
+endif
 
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
@@ -171,7 +183,6 @@ CPUSUF=386
 endif
 ifeq ($(CPC_TARGET),m68k)
 CPUSUF=68k
-ALLOW_WARNINGS=1
 endif
 ifeq ($(CPC_TARGET),powerpc)
 CPUSUF=ppc
@@ -182,6 +193,9 @@ endif
 ifeq ($(CPC_TARGET),sparc)
 CPUSUF=sparc
 endif
+ifeq ($(CPC_TARGET),sparc64)
+CPUSUF=sparc64
+endif
 ifeq ($(CPC_TARGET),x86_64)
 CPUSUF=x64
 endif
@@ -207,6 +221,15 @@ endif
 ifeq ($(CPC_TARGET),aarch64)
 CPUSUF=a64
 endif
+ifeq ($(CPC_TARGET),riscv32)
+CPUSUF=rv32
+endif
+ifeq ($(CPC_TARGET),riscv64)
+CPUSUF=rv64
+endif
+ifeq ($(CPC_TARGET),xtensa)
+CPUSUF=xtensa
+endif
 
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
@@ -217,6 +240,15 @@ MSGFILE=msg/error$(FPCLANG).msg
 
 
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
+PPUDUMPPROG:=$(firstword $(strip $(wildcard $(addsuffix /ppudump$(SRCEXEEXT),$(SEARCHPATH)))))
+ifndef PPUDUMP
+ifdef PPUDUMPPROG
+PPUDUMP=$(PPUDUMPPROG)
+else
+PPUDUMP=ppudump
+endif
+endif
+
 # Check if revision.inc is present
 REVINC:=$(wildcard revision.inc)
 ifneq ($(REVINC),)
@@ -243,12 +275,12 @@ override LOCALOPT+=-d$(CPC_TARGET) -dGDB -dBROWSERLOG
 
 #include LLVM define/directory if requested
 ifdef LLVM
-ifeq ($(findstring $(PPC_TARGET),x86_64),)
-$(error The $(PPC_TARGET) architecture is not (yet) support by the FPC/LLVM code generator)
+ifeq ($(findstring $(PPC_TARGET),x86_64 aarch64 arm),)
+$(error The $(PPC_TARGET) architecture is not (yet) supported by the FPC/LLVM code generator)
 endif
 
 ifeq ($(findstring $(OS_TARGET),darwin iphonesim linux),)
-$(error The $(PPC_TARGET) target OS is not (yet) support by the FPC/LLVM code generator)
+$(error The $(PPC_TARGET) target OS is not (yet) supported by the FPC/LLVM code generator)
 endif
 
 override LOCALOPT+=-dllvm -Fullvm
@@ -256,12 +288,12 @@ endif
 
 # i386 specific
 ifeq ($(PPC_TARGET),i386)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 
 # x86_64 specific
 ifeq ($(PPC_TARGET),x86_64)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 
 # PowerPC specific
@@ -281,12 +313,22 @@ endif
 
 # Sparc specific
 ifeq ($(PPC_TARGET),sparc)
-override LOCALOPT+=
+override LOCALOPT+=-Fusparcgen -Fisparcgen
+endif
+
+# Sparc specific
+ifeq ($(PPC_TARGET),sparc64)
+override LOCALOPT+=-Fusparcgen -Fisparcgen
 endif
 
 # ARM specific
 ifeq ($(PPC_TARGET),arm)
-override LOCALOPT+=
+override LOCALOPT+=-Fuarmgen
+endif
+
+# ARMEB specific
+ifeq ($(PPC_TARGET),armeb)
+override LOCALOPT+=-Fuarmgen
 endif
 
 # mipsel specific
@@ -299,11 +341,26 @@ ifeq ($(PPC_TARGET),jvm)
 override LOCALOPT+=-Fujvm
 endif
 
+# AArch64 specific
+ifeq ($(PPC_TARGET),aarch64)
+override LOCALOPT+=-Fuarmgen
+endif
+
 # i8086 specific
 ifeq ($(PPC_TARGET),i8086)
 override LOCALOPT+=-Fux86
 endif
 
+# RiscV32 specific
+ifeq ($(PPC_TARGET),riscv32)
+override LOCALOPT+=-Furiscv
+endif
+
+# RiscV64 specific
+ifeq ($(PPC_TARGET),riscv64)
+override LOCALOPT+=-Furiscv
+endif
+
 OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 # symbol liveness WPO requires nm, smart linking and no stripping (the latter
@@ -337,6 +394,16 @@ endif
 ifeq ($(OS_TARGET),win16)
 NoNativeBinaries=1
 endif
+ifeq ($(OS_TARGET),macos)
+NoNativeBinaries=1
+endif
+
+# Allow install for jvm
+ifeq ($(NoNativeBinaries),1)
+override EXEEXT=$(SRCEXEEXT)
+# In those cases, installation in a cross-installation
+CROSSINSTALL=1
+endif
 
 [rules]
 #####################################################################
@@ -355,7 +422,7 @@ endif
 
 # Use -Sew option by default
 # Allow disabling by setting ALLOW_WARNINGS=1
-ifeq ($(findstring 2.4.,$(FPC_VERSION)),)
+ifneq ($(CYCLELEVEL),1)
 ifndef ALLOW_WARNINGS
 override LOCALOPT+=-Sew
 endif
@@ -408,23 +475,139 @@ INSTALLEXEFILE=$(EXENAME)
 endif
 
 #####################################################################
-# CPU targets
+# Rules to run the compiler trough GDB using utils/gppc386.pp code
+# inside specific levels of cycle.
+# Simply compile utils and utils/gppc386
+# And move generated utils/gppc386 to ./g$(TEMPNAME)
 #####################################################################
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64
+# Use debugger for all compilations
+ifdef DEBUG_CYCLE
+DEBUG_EXENAME=1
+DEBUG_PPEXENAME=1
+DEBUG_TEMPNAME=1
+DEBUG_PPCROSSNAME=1
+DEBUG_TEMPNAME1=1
+DEBUG_TEMPNAME2=1
+DEBUG_TEMPNAME3=1
+DEBUG_TEMPWPONAME1=1
+DEBUG_TEMPWPONAME2=1
+endif
+
+# Or DEBUG_XXX to only start a specific compiler
+# inside GDB
+ifdef DEBUG_EXENAME
+EXENAMEPREFIX=g
+NEED_G_COMPILERS+=g$(EXENAME)
+endif
+
+ifdef DEBUG_PPEXENAME
+PPEXENAMEPREFIX=g
+NEED_G_COMPILERS+=g$(PPEXENAME)
+endif
+
+ifdef DEBUG_TEMPNAME
+TEMPNAMEPREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME)
+endif
+
+ifdef DEBUG_PPCROSSNAME
+PPCROSSNAMEPREFIX=g
+NEED_G_COMPILERS+=g$(PPCROSSNAME)
+endif
+
+ifdef DEBUG_TEMPNAME1
+TEMPNAME1PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME1)
+endif
+
+ifdef DEBUG_TEMPNAME2
+TEMPNAME2PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME2)
+endif
+
+ifdef DEBUG_TEMPNAME3
+TEMPNAME3PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME3)
+endif
+
+ifdef DEBUG_TEMPWPONAME1
+TEMPNAMEWPO1PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPWPONAME1)
+endif
+
+ifdef DEBUG_TEMPWPONAME2
+TEMPWPONAME2PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPWPONAME2)
+endif
+
+ALL_G_COMPILERS="g$(EXENAME) g$(PPEXENAME) g$(TEMPNAME) g$(PPCROSSNAME) g$(TEMPNAME1) g$(TEMPNAME2) g$(TEMPNAME3) g$(TEMPWPONAME1) g$(TEMPWPONAME2)"
+
+#####################################################################
+# To start a given compiler $(PP) with gdb, copy utils/gppc386 as g$(PP).
+# Symbolic link is not working, full copy required.
+# Use a file as time stamp to avoid recompiling utils/gppc386
+# unless needed.
+#####################################################################
+g$(COMPILERTEMPNAME): fpcmade.generate_g_compilers
+	$(COPY) ./utils/gppc386 ./g$(COMPILERTEMPNAME)
+
+fpcmade.generate_g_compilers: utils/gppc386.pp
+	$(MAKE) rtlclean rtl utils
+	$(MAKE) -C utils gppc386$(EXEEXT)
+	$(GECHO) -n "utils/gppc386 generated at " > $@
+	$(GDATE) >> $@
+
+ifdef EXENAMEPREFIX
+	$(MAKE) g$(EXENAME) COMPILERTEMPNAME=$(EXENAME)
+endif
+ifdef PPEXENAMEPREFIX
+	$(MAKE) g$(PPEXENAME) COMPILERTEMPNAME=$(PPEXENAME)
+endif
+ifdef TEMPNAMEPREFIX
+	$(MAKE) g$(TEMPNAME) COMPILERTEMPNAME=$(TEMPNAME)
+endif
+ifdef PPCROSSNAMEPREFIX
+	$(MAKE) g$(PPCROSSNAME) COMPILERTEMPNAME=$(PPCROSSNAME)
+endif
+ifdef TEMPNAME1PREFIX
+	$(MAKE) g$(TEMPNAME1) COMPILERTEMPNAME=$(TEMPNAME1)
+endif
+ifdef TEMPNAME2PREFIX
+	$(MAKE) g$(TEMPNAME2) COMPILERTEMPNAME=$(TEMPNAME2)
+endif
+ifdef TEMPNAME3PREFIX
+	$(MAKE) g$(TEMPNAME3) COMPILERTEMPNAME=$(TEMPNAME3)
+endif
+ifdef TEMPWPONAME1PREFIX
+	$(MAKE) g$(TEMPWPONAME1) COMPILERTEMPNAME=$(TEMPWPONAME1)
+endif
+ifdef TEMPWPONAME2PREFIX
+	$(MAKE) g$(TEMPWPONAME2) COMPILERTEMPNAME=$(TEMPWPONAME2)
+endif
+
+
+#####################################################################
+# cpu targets
+#####################################################################
+
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa
+PPC_SUFFIXES=386 68k ppc sparc arm armeb x64 ppc64 mips mipsel avr jvm 8086 a64 sparc64 rv32 rv64 xtensa
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 SYMLINKINSTALL_TARGETS=$(addsuffix _symlink_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)$(SYMLINKINSTALL_TARGETS)
 
 $(PPC_TARGETS):
-        $(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ all
+        $(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ compiler
 
 $(INSTALL_TARGETS):
-        $(MAKE) all install PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@)
+        $(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) compiler
+		$(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) exeinstall
 
 $(SYMLINKINSTALL_TARGETS):
-        $(MAKE) all installsymlink PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@)
+        $(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) compiler
+		$(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) installsymlink
 
 alltargets: $(ALLTARGETS)
 
@@ -433,8 +616,6 @@ alltargets: $(ALLTARGETS)
 # Default makefile
 #####################################################################
 
-.NOTPARALLEL:
-
 .PHONY: all compiler echotime ppuclean execlean clean distclean
 
 all: compiler $(addsuffix _all,$(TARGET_DIRS))
@@ -460,25 +641,26 @@ ENDTIME:=unknown
 endif
 
 echotime:
-        @echo Start $(STARTTIME) now $(ENDTIME)
+	@echo Start $(STARTTIME) now $(ENDTIME)
 
 ppuclean:
-        -$(DEL) *$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT)
-        -$(DEL) $(addsuffix /*$(PPUEXT),$(COMPILERSOURCEDIR))
+	-$(DEL) *$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT)
+	-$(DEL) $(addsuffix /*$(PPUEXT),$(COMPILERSOURCEDIR))
 
 tempclean:
-        -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
+	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 
 execlean :
-        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT)
-        -$(DEL) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT)
-        -$(DEL) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+	-$(DEL) $(addsuffix $(EXEEXT), $(addprefix ppc, $(PPC_SUFFIXES)))
+	-$(DEL) $(addsuffix $(EXEEXT), $(addprefix ppcross, $(PPC_SUFFIXES)))
+	-$(DEL) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2) $(ALL_G_COMPILERS)
+	-$(DEL) fpcmade.generate_g_compilers
 
 $(addsuffix _clean,$(ALLTARGETS)):
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
+        -$(DELTREE) $(addprefix $(subst _clean,,$@),/bin)
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
+        -$(DEL) $(addprefix $(subst _clean,,$@)/ppc,$(addsuffix $(EXEEXT), $(PPC_SUFFIXES)))
 
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
         -$(DEL) $(EXENAME)
@@ -532,10 +714,16 @@ regdatarm : arm/armreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmreg.pp
         cd arm && ..$(PATHSEP)utils$(PATHSEP)mkarmreg$(SRCEXEEXT)
 
-regdatsp : sparc/spreg.dat
+regdatsp : sparcgen/spreg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
-        cd sparc && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT)
+        cd sparcgen && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT)
+		mv -f sparcgen/rsp*.inc sparc
 
+regdatsp64 : sparcgen/spreg.dat
+            $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
+        cd sparcgen && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT) sparc64
+		mv -f sparcgen/rsp*.inc sparc64
+		
 regdatavr : avr/avrreg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkavrreg.pp
         cd avr && ..$(PATHSEP)utils$(PATHSEP)mkavrreg$(SRCEXEEXT)
@@ -548,6 +736,8 @@ regdatmips : mips/mipsreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkmpsreg.pp
         cd mips && ..$(PATHSEP)utils$(PATHSEP)mkmpsreg$(SRCEXEEXT)
 
+regdat : regdatx86 regdatarm regdatsp regdatavr regdataarch64 regdatmips regdatsp64 regdatz80
+
 regdatz80 : z80/z80reg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkz80reg.pp
         cd z80 && ..$(PATHSEP)utils$(PATHSEP)mkz80reg$(SRCEXEEXT)
@@ -598,7 +788,6 @@ endif
         $(EXECPPAS)
         $(MOVE) $(COMPILER_TARGETDIR)/$(PPEXENAME) $(EXENAME)
 
-
 #####################################################################
 # Cycle targets
 #
@@ -629,22 +818,27 @@ ifeq ($(OS_SOURCE),$(OS_TARGET))
 ifndef NOWPOCYCLE
 ifdef RELEASE
 DOWPOCYCLE=1
+endif
+endif
+
+ifdef DOWPOCYCLE
 # Two WPO cycles in case of RELEASE=1
 wpocycle:
 # don't use cycle_clean, it will delete the compiler utilities again
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) 
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' compiler
         $(MOVE) $(EXENAME) $(TEMPWPONAME1)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS))
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' compiler
         $(COPY) $(EXENAME) $(TEMPWPONAME2)
-endif
-endif
-
-ifndef DOWPOCYCLE
+else
 wpocycle:
 endif
 
@@ -669,8 +863,10 @@ next :
         $(COPY) $(FPC) $(EXENAME)
 else
 next :
-        $(MAKE) rtlclean rtl
-        $(MAKE) cycleclean compiler
+        $(MAKE) rtlclean
+        $(MAKE) rtl
+        $(MAKE) cycleclean
+        $(MAKE) compiler
         $(MAKE) echotime
 endif
 
@@ -680,20 +876,24 @@ $(TEMPNAME1) :
         $(MOVE) $(EXENAME) $(TEMPNAME1)
 
 $(TEMPNAME2) : $(TEMPNAME1)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME1)' 'OLDFPC=' next CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME1PREFIX)$(TEMPNAME1)' 'OLDFPC=' next CYCLELEVEL=2
         -$(DEL) $(TEMPNAME2)
         $(MOVE) $(EXENAME) $(TEMPNAME2)
 
 $(TEMPNAME3) : $(TEMPNAME2)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME2)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME1)' next CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME2PREFIX)$(TEMPNAME2)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME1)' next CYCLELEVEL=3
         -$(DEL) $(TEMPNAME3)
         $(MOVE) $(EXENAME) $(TEMPNAME3)
 
 cycle:
-        $(MAKE) tempclean $(TEMPNAME3)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME2)' next CYCLELEVEL=4
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
+        $(MAKE) tempclean
+        $(MAKE) $(TEMPNAME3)
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME2)' next CYCLELEVEL=4
         $(DIFF) $(TEMPNAME3) $(EXENAME)
-        $(MAKE) $(addsuffix _all,$(TARGET_DIRS)) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(addsuffix _all,$(TARGET_DIRS)) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
         $(MAKE) wpocycle
         $(MAKE) echotime
 
@@ -704,17 +904,26 @@ else
 #
 
 cycle:
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
 # ppc (source native)
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
-        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtlclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 rtl
 ifndef NoNativeBinaries
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 compiler
 endif
 endif
 
@@ -732,18 +941,27 @@ else
 
 cycle: override FPC=
 cycle:
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
 # ppc (source native)
 # Clear detected compiler binary, because it can be existing crosscompiler binary, but we need native compiler here
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtlclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean 
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler 
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler 
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' CYCLELEVEL=3 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' CYCLELEVEL=3 rtl
 ifndef NoNativeBinaries
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' CYCLELEVEL=3 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' CYCLELEVEL=3 compiler
 endif
 endif
 
@@ -766,7 +984,8 @@ cvstest:
 #
 # 1. build a compiler using cycle
 # 2. remove all .ppufiles
-# 3. build all supported cross compilers except the
+# 3. clean and recompile rtl if DOWPOCYCLE is set
+# 4. build all supported cross compilers except the
 #    current PPC_TARGET which was already build
 # unless FPC_SUPPORT_X87_TYPES_ON_WIN64 is set,
 # win64 cannot compile i386 or i8086 compiler
@@ -777,19 +996,24 @@ ifeq ($(OS_SOURCE),win64)
   EXCLUDE_80BIT_TARGETS=1
 endif
 
-ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc),)
+ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64 riscv32 riscv64 xtensa),)
   EXCLUDE_80BIT_TARGETS=1
 endif
 
 full: fullcycle
 
 fullcycle:
+        $(MAKE) distclean
         $(MAKE) cycle
         $(MAKE) ppuclean
+ifdef DOWPOCYCLE
+        $(MAKE) rtlclean
+        $(MAKE) rtl 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
+endif
 ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(filter-out $(PPC_TARGET),$(CYCLETARGETS)) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(filter-out $(PPC_TARGET),$(CYCLETARGETS)) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 else
-        $(MAKE) $(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 endif
 
 #####################################################################
@@ -837,12 +1061,13 @@ endif
 
 fullinstall:
 ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(addsuffix _exe_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))
+        $(MAKE) $(addsuffix _exe_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))) $(addsuffix _all,$(TARGET_DIRS))
 else
-        $(MAKE) $(addsuffix _exe_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))))
+        $(MAKE) $(addsuffix _exe_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))))) $(addsuffix _all,$(TARGET_DIRS))
 endif
-
-install: quickinstall
+        $(MAKE) $(addsuffix _install,$(TARGET_DIRS))
+        
+auxfilesinstall:
 ifndef CROSSINSTALL
 ifdef UNIXHier
         $(MKDIR) $(INSTALL_BASEDIR)
@@ -851,6 +1076,15 @@ endif
         $(MKDIR) $(MSGINSTALLDIR)
         $(INSTALL) $(MSGFILES) $(MSGINSTALLDIR)
 endif
+	
+
+install: 
+# if no FPC is passed, use that one we assume, we just build
+ifndef FPC
+	$(MAKE) quickinstall auxfilesinstall FPC=$(BASEDIR)/$(INSTALLEXEFILE)
+else
+	$(MAKE) quickinstall auxfilesinstall
+endif
 
 # This also installs a link from bin to the actual executable.
 # The .deb does that later.
@@ -892,7 +1126,7 @@ ALLPPUDIR=$(CPU_TARGET)/units/*
 PPULIST=$(wildcard $(PPUDIR)/*.ppu)
 PPULOGLIST=$(subst .ppu,.log-ppu,$(PPULIST))
 
-RTLPPUDIR=../rtl/units/$(FULL_TARGET)
+RTLPPUDIR=../rtl/units/$(TARGETSUFFIX)
 RTLPPULIST=$(wildcard $(RTLPPUDIR)/*.ppu)
 RTLPPULOGLIST=$(subst .ppu,.log-ppu,$(RTLPPULIST))
 
@@ -905,8 +1139,9 @@ rtlppulogs : $(RTLPPULOGLIST)
 vpath %.ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 vpath %.log-ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 
-%.log-ppu : %.ppu ./utils/ppudump$(EXEEXT)
-	.$(PATHSEP)utils$(PATHSEP)ppudump -VA -M $< > $@
+# Use installed ppudump
+%.log-ppu : %.ppu
+	$(PPUDUMP) -VA -M $< > $@
 
 
 ./utils/ppudump$(EXEEXT):
@@ -915,6 +1150,8 @@ vpath %.log-ppu $(PPUDIR) $(RTLPPUDIR) $(ALLPPUDIR)
 ppuinfo :
 	echo PPU list is "$(PPULIST)"
 	echo PPULOG list is "$(PPULOGLIST)"
+	echo RTLPPUDIR is "$(RTLPPUDIR)"
+	echo RTLPPU list is "$(RTLPPULIST)"
 
 cleanppulogs :
 	-$(RMPROG) $(PPULOGLIST)

+ 2 - 1
compiler/aarch64/a64att.inc

@@ -187,5 +187,6 @@
 'fcsel',
 'umov',
 'ins',
-'movi'
+'movi',
+'veor'
 );

+ 1 - 0
compiler/aarch64/a64atts.inc

@@ -187,5 +187,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 2 - 0
compiler/aarch64/a64ins.dat

@@ -376,3 +376,5 @@
 [INS]
 
 [MOVI]
+
+[VEOR]

+ 2 - 1
compiler/aarch64/a64op.inc

@@ -187,5 +187,6 @@ A_FCMMPE,
 A_FCSEL,
 A_UMOV,
 A_INS,
-A_MOVI
+A_MOVI,
+A_VEOR
 );

+ 67 - 2
compiler/aarch64/a64reg.dat

@@ -76,170 +76,235 @@ XZR,$01,$05,$1F,xzr,31,31
 WSP,$01,$04,$20,wsp,31,31
 SP,$01,$05,$20,sp,31,31
 
-
 ; vfp registers
+; generated by fpc/compiler/utils/gena64vfp.pp to avoid tedious typing
 B0,$04,$01,$00,b0,64,64
 H0,$04,$03,$00,h0,64,64
 S0,$04,$09,$00,s0,64,64
 D0,$04,$0a,$00,d0,64,64
 Q0,$04,$05,$00,q0,64,64
+V08B,$04,$17,$00,v0.8b,64,64
+V016B,$04,$18,$00,v0.16b,64,64
 B1,$04,$01,$01,b1,65,65
 H1,$04,$03,$01,h1,65,65
 S1,$04,$09,$01,s1,65,65
 D1,$04,$0a,$01,d1,65,65
 Q1,$04,$05,$01,q1,65,65
+V18B,$04,$17,$01,v1.8b,65,65
+V116B,$04,$18,$01,v1.16b,65,65
 B2,$04,$01,$02,b2,66,66
 H2,$04,$03,$02,h2,66,66
 S2,$04,$09,$02,s2,66,66
 D2,$04,$0a,$02,d2,66,66
 Q2,$04,$05,$02,q2,66,66
+V28B,$04,$17,$02,v2.8b,66,66
+V216B,$04,$18,$02,v2.16b,66,66
 B3,$04,$01,$03,b3,67,67
 H3,$04,$03,$03,h3,67,67
 S3,$04,$09,$03,s3,67,67
 D3,$04,$0a,$03,d3,67,67
 Q3,$04,$05,$03,q3,67,67
+V38B,$04,$17,$03,v3.8b,67,67
+V316B,$04,$18,$03,v3.16b,67,67
 B4,$04,$01,$04,b4,68,68
 H4,$04,$03,$04,h4,68,68
 S4,$04,$09,$04,s4,68,68
 D4,$04,$0a,$04,d4,68,68
 Q4,$04,$05,$04,q4,68,68
+V48B,$04,$17,$04,v4.8b,68,68
+V416B,$04,$18,$04,v4.16b,68,68
 B5,$04,$01,$05,b5,69,69
 H5,$04,$03,$05,h5,69,69
 S5,$04,$09,$05,s5,69,69
 D5,$04,$0a,$05,d5,69,69
 Q5,$04,$05,$05,q5,69,69
+V58B,$04,$17,$05,v5.8b,69,69
+V516B,$04,$18,$05,v5.16b,69,69
 B6,$04,$01,$06,b6,70,70
 H6,$04,$03,$06,h6,70,70
-S6,$04,$09,$06,s6,70,70
+S6,$04,$09,$06,s6,70,70                                                                     gena64vfp.pp
 D6,$04,$0a,$06,d6,70,70
 Q6,$04,$05,$06,q6,70,70
+V68B,$04,$17,$06,v6.8b,70,70
+V616B,$04,$18,$06,v6.16b,70,70
 B7,$04,$01,$07,b7,71,71
 H7,$04,$03,$07,h7,71,71
 S7,$04,$09,$07,s7,71,71
 D7,$04,$0a,$07,d7,71,71
 Q7,$04,$05,$07,q7,71,71
+V78B,$04,$17,$07,v7.8b,71,71
+V716B,$04,$18,$07,v7.16b,71,71
 B8,$04,$01,$08,b8,72,72
 H8,$04,$03,$08,h8,72,72
 S8,$04,$09,$08,s8,72,72
 D8,$04,$0a,$08,d8,72,72
 Q8,$04,$05,$08,q8,72,72
+V88B,$04,$17,$08,v8.8b,72,72
+V816B,$04,$18,$08,v8.16b,72,72
 B9,$04,$01,$09,b9,73,73
 H9,$04,$03,$09,h9,73,73
 S9,$04,$09,$09,s9,73,73
 D9,$04,$0a,$09,d9,73,73
 Q9,$04,$05,$09,q9,73,73
+V98B,$04,$17,$09,v9.8b,73,73
+V916B,$04,$18,$09,v9.16b,73,73
 B10,$04,$01,$0A,b10,74,74
 H10,$04,$03,$0A,h10,74,74
 S10,$04,$09,$0A,s10,74,74
 D10,$04,$0a,$0A,d10,74,74
 Q10,$04,$05,$0A,q10,74,74
+V108B,$04,$17,$0A,v10.8b,74,74
+V1016B,$04,$18,$0A,v10.16b,74,74
 B11,$04,$01,$0B,b11,75,75
 H11,$04,$03,$0B,h11,75,75
 S11,$04,$09,$0B,s11,75,75
 D11,$04,$0a,$0B,d11,75,75
 Q11,$04,$05,$0B,q11,75,75
+V118B,$04,$17,$0B,v11.8b,75,75
+V1116B,$04,$18,$0B,v11.16b,75,75
 B12,$04,$01,$0C,b12,76,76
 H12,$04,$03,$0C,h12,76,76
 S12,$04,$09,$0C,s12,76,76
 D12,$04,$0a,$0C,d12,76,76
 Q12,$04,$05,$0C,q12,76,76
+V128B,$04,$17,$0C,v12.8b,76,76
+V1216B,$04,$18,$0C,v12.16b,76,76
 B13,$04,$01,$0D,b13,77,77
 H13,$04,$03,$0D,h13,77,77
 S13,$04,$09,$0D,s13,77,77
 D13,$04,$0a,$0D,d13,77,77
 Q13,$04,$05,$0D,q13,77,77
+V138B,$04,$17,$0D,v13.8b,77,77
+V1316B,$04,$18,$0D,v13.16b,77,77
 B14,$04,$01,$0E,b14,78,78
 H14,$04,$03,$0E,h14,78,78
 S14,$04,$09,$0E,s14,78,78
 D14,$04,$0a,$0E,d14,78,78
 Q14,$04,$05,$0E,q14,78,78
+V148B,$04,$17,$0E,v14.8b,78,78
+V1416B,$04,$18,$0E,v14.16b,78,78
 B15,$04,$01,$0F,b15,79,79
 H15,$04,$03,$0F,h15,79,79
 S15,$04,$09,$0F,s15,79,79
 D15,$04,$0a,$0F,d15,79,79
 Q15,$04,$05,$0F,q15,79,79
+V158B,$04,$17,$0F,v15.8b,79,79
+V1516B,$04,$18,$0F,v15.16b,79,79
 B16,$04,$01,$10,b16,80,80
 H16,$04,$03,$10,h16,80,80
 S16,$04,$09,$10,s16,80,80
 D16,$04,$0a,$10,d16,80,80
 Q16,$04,$05,$10,q16,80,80
+V168B,$04,$17,$10,v16.8b,80,80
+V1616B,$04,$18,$10,v16.16b,80,80
 B17,$04,$01,$11,b17,81,81
 H17,$04,$03,$11,h17,81,81
 S17,$04,$09,$11,s17,81,81
 D17,$04,$0a,$11,d17,81,81
 Q17,$04,$05,$11,q17,81,81
+V178B,$04,$17,$11,v17.8b,81,81
+V1716B,$04,$18,$11,v17.16b,81,81
 B18,$04,$01,$12,b18,82,82
 H18,$04,$03,$12,h18,82,82
 S18,$04,$09,$12,s18,82,82
 D18,$04,$0a,$12,d18,82,82
 Q18,$04,$05,$12,q18,82,82
+V188B,$04,$17,$12,v18.8b,82,82
+V1816B,$04,$18,$12,v18.16b,82,82
 B19,$04,$01,$13,b19,83,83
 H19,$04,$03,$13,h19,83,83
 S19,$04,$09,$13,s19,83,83
 D19,$04,$0a,$13,d19,83,83
 Q19,$04,$05,$13,q19,83,83
+V198B,$04,$17,$13,v19.8b,83,83
+V1916B,$04,$18,$13,v19.16b,83,83
 B20,$04,$01,$14,b20,84,84
 H20,$04,$03,$14,h20,84,84
 S20,$04,$09,$14,s20,84,84
 D20,$04,$0a,$14,d20,84,84
 Q20,$04,$05,$14,q20,84,84
+V208B,$04,$17,$14,v20.8b,84,84
+V2016B,$04,$18,$14,v20.16b,84,84
 B21,$04,$01,$15,b21,85,85
 H21,$04,$03,$15,h21,85,85
 S21,$04,$09,$15,s21,85,85
 D21,$04,$0a,$15,d21,85,85
 Q21,$04,$05,$15,q21,85,85
+V218B,$04,$17,$15,v21.8b,85,85
+V2116B,$04,$18,$15,v21.16b,85,85
 B22,$04,$01,$16,b22,86,86
 H22,$04,$03,$16,h22,86,86
 S22,$04,$09,$16,s22,86,86
 D22,$04,$0a,$16,d22,86,86
 Q22,$04,$05,$16,q22,86,86
+V228B,$04,$17,$16,v22.8b,86,86
+V2216B,$04,$18,$16,v22.16b,86,86
 B23,$04,$01,$17,b23,87,87
 H23,$04,$03,$17,h23,87,87
 S23,$04,$09,$17,s23,87,87
 D23,$04,$0a,$17,d23,87,87
 Q23,$04,$05,$17,q23,87,87
+V238B,$04,$17,$17,v23.8b,87,87
+V2316B,$04,$18,$17,v23.16b,87,87
 B24,$04,$01,$18,b24,88,88
 H24,$04,$03,$18,h24,88,88
 S24,$04,$09,$18,s24,88,88
 D24,$04,$0a,$18,d24,88,88
 Q24,$04,$05,$18,q24,88,88
+V248B,$04,$17,$18,v24.8b,88,88
+V2416B,$04,$18,$18,v24.16b,88,88
 B25,$04,$01,$19,b25,89,89
 H25,$04,$03,$19,h25,89,89
 S25,$04,$09,$19,s25,89,89
 D25,$04,$0a,$19,d25,89,89
 Q25,$04,$05,$19,q25,89,89
+V258B,$04,$17,$19,v25.8b,89,89
+V2516B,$04,$18,$19,v25.16b,89,89
 B26,$04,$01,$1A,b26,90,90
 H26,$04,$03,$1A,h26,90,90
 S26,$04,$09,$1A,s26,90,90
 D26,$04,$0a,$1A,d26,90,90
 Q26,$04,$05,$1A,q26,90,90
+V268B,$04,$17,$1A,v26.8b,90,90
+V2616B,$04,$18,$1A,v26.16b,90,90
 B27,$04,$01,$1B,b27,91,91
 H27,$04,$03,$1B,h27,91,91
 S27,$04,$09,$1B,s27,91,91
 D27,$04,$0a,$1B,d27,91,91
 Q27,$04,$05,$1B,q27,91,91
+V278B,$04,$17,$1B,v27.8b,91,91
+V2716B,$04,$18,$1B,v27.16b,91,91
 B28,$04,$01,$1C,b28,92,92
 H28,$04,$03,$1C,h28,92,92
 S28,$04,$09,$1C,s28,92,92
 D28,$04,$0a,$1C,d28,92,92
 Q28,$04,$05,$1C,q28,92,92
+V288B,$04,$17,$1C,v28.8b,92,92
+V2816B,$04,$18,$1C,v28.16b,92,92
 B29,$04,$01,$1D,b29,93,93
 H29,$04,$03,$1D,h29,93,93
 S29,$04,$09,$1D,s29,93,93
 D29,$04,$0a,$1D,d29,93,93
 Q29,$04,$05,$1D,q29,93,93
+V298B,$04,$17,$1D,v29.8b,93,93
+V2916B,$04,$18,$1D,v29.16b,93,93
 B30,$04,$01,$1E,b30,94,94
 H30,$04,$03,$1E,h30,94,94
 S30,$04,$09,$1E,s30,94,94
 D30,$04,$0a,$1E,d30,94,94
 Q30,$04,$05,$1E,q30,94,94
+V308B,$04,$17,$1E,v30.8b,94,94
+V3016B,$04,$18,$1E,v30.16b,94,94
 B31,$04,$01,$1F,b31,95,95
 H31,$04,$03,$1F,h31,95,95
 S31,$04,$09,$1F,s31,95,95
 D31,$04,$0a,$1F,d31,95,95
 Q31,$04,$05,$1F,q31,95,95
+V318B,$04,$17,$1F,v31.8b,95,95
+V3116B,$04,$18,$1F,v31.16b,95,95
 
 NZCV,$05,$00,$00,nzcv,0,0
 FPCR,$05,$00,$01,fpcr,0,0
 FPSR,$05,$00,$02,fpsr,0,0
 TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0
+

+ 106 - 10
compiler/aarch64/aasmcpu.pas

@@ -157,6 +157,8 @@ uses
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadconditioncode(opidx: longint; const c: tasmcond);
+         procedure loadrealconst(opidx: longint; const _value: bestreal);
+
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -168,6 +170,7 @@ uses
          constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
          constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
+         constructor op_reg_realconst(op: tasmop; _op1: tregister; _op2: bestreal);
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
@@ -180,7 +183,6 @@ uses
          constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
          constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
 
-
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
 
@@ -280,6 +282,19 @@ implementation
       end;
 
 
+    procedure taicpu.loadrealconst(opidx:longint;const _value:bestreal);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_realconst then
+              clearop(opidx);
+            val_real:=_value;
+            typ:=top_realconst;
+          end;
+      end;
+
+
 {*****************************************************************************
                                  taicpu Constructors
 *****************************************************************************}
@@ -382,6 +397,15 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_realconst(op : tasmop; _op1 : tregister; _op2 : bestreal);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadrealconst(1,_op2);
+      end;
+
+
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
        begin
          inherited create(op);
@@ -528,7 +552,7 @@ implementation
       const
         { invalid sizes for aarch64 are 0 }
         subreg2bytesize: array[TSubRegister] of byte =
-          (0,0,0,0,4,8,0,0,0,4,8,0,0,0);
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0,0,0,0,0,0,0,0,0,0,8,16,0);
       var
         scalefactor: byte;
       begin
@@ -554,16 +578,17 @@ implementation
       begin
         result:=sr_complex;
         if not assigned(ref.symboldata) and
-           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+           not(ref.refaddr in [addr_pic,addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
           exit;
         { can't use pre-/post-indexed mode here (makes no sense either) }
         if ref.addressmode<>AM_OFFSET then
           exit;
         { "ldr literal" must be a 32/64 bit LDR and have a symbol }
-        if assigned(ref.symboldata) and
+        if (ref.refaddr=addr_pic) and
            ((op<>A_LDR) or
             not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
-            not assigned(ref.symbol)) then
+            (not assigned(ref.symbol) and
+             not assigned(ref.symboldata))) then
           exit;
         { if this is a (got) page offset load, we must have a base register and a
           symbol }
@@ -592,7 +617,6 @@ implementation
 
     function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
       var
-        maxoffs: asizeint;
         accesssize: longint;
       begin
         result:=sr_internal_illegal;
@@ -867,10 +891,13 @@ implementation
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
         case opcode of
-          A_B,A_BL,
+          A_B,A_BL,A_BR,A_BLR,
           A_CMN,A_CMP,
           A_CCMN,A_CCMP,
-          A_TST:
+          A_TST,
+          A_FCMP,A_FCMPE,
+          A_CBZ,A_CBNZ,
+          A_RET:
             result:=operand_read;
           A_STR,A_STUR:
             if opnr=0 then
@@ -903,11 +930,78 @@ implementation
                  { check for pre/post indexed in spilling_get_operation_type_ref }
                  result:=operand_read;
              end;
+{$ifdef EXTDEBUG}
+           { play save to avoid hard to find bugs, better fail at compile time }
+           A_ADD,
+           A_ADRP,
+           A_AND,
+           A_ASR,
+           A_BFI,
+           A_BFXIL,
+           A_CLZ,
+           A_CSEL,
+           A_CSET,
+           A_CSETM,
+           A_FABS,
+           A_EON,
+           A_EOR,
+           A_FADD,
+           A_FCVT,
+           A_FDIV,
+           A_FMADD,
+           A_FMOV,
+           A_FMSUB,
+           A_FMUL,
+           A_FNEG,
+           A_FNMADD,
+           A_FNMSUB,
+           A_FRINTX,
+           A_FSQRT,
+           A_FSUB,
+           A_ORR,
+           A_LSL,
+           A_LSLV,
+           A_LSR,
+           A_LSRV,
+           A_MOV,
+           A_MOVK,
+           A_MOVN,
+           A_MOVZ,
+           A_MSUB,
+           A_MUL,
+           A_MVN,
+           A_NEG,
+           A_LDR,
+           A_LDUR,
+           A_RBIT,
+           A_ROR,
+           A_RORV,
+           A_SBFX,
+           A_SCVTF,
+           A_FCVTZS,
+           A_SDIV,
+           A_SMULL,
+           A_SUB,
+           A_SXT,
+           A_UBFIZ,
+           A_UBFX,
+           A_UCVTF,
+           A_UDIV,
+           A_UMULL,
+           A_UXT:
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
+           else
+             Internalerror(2019090802);
+{$else EXTDEBUG}
            else
              if opnr=0 then
                result:=operand_write
              else
                result:=operand_read;
+{$endif EXTDEBUG}
         end;
       end;
 
@@ -922,8 +1016,8 @@ implementation
 
 
     procedure BuildInsTabCache;
-      var
-        i : longint;
+//      var
+//        i : longint;
       begin
 (*        new(instabcache);
         FillChar(instabcache^,sizeof(tinstabcache),$ff);
@@ -1006,6 +1100,7 @@ implementation
 *)
 
     procedure insertpcrelativedata(list,listtoinsert : TAsmList);
+(*
       var
         curinspos,
         penalty,
@@ -1021,6 +1116,7 @@ implementation
         l : tasmlabel;
         doinsert,
         removeref : boolean;
+*)
       begin
 (*
         curdata:=TAsmList.create;

+ 15 - 4
compiler/aarch64/agcpugas.pas

@@ -50,7 +50,7 @@ unit agcpugas;
 
     const
       gas_shiftmode2str : array[tshiftmode] of string[4] = (
-        '','lsl','lsr','asr',
+        '','lsl','lsr','asr','ror',
         'uxtb','uxth','uxtw','uxtx',
         'sxtb','sxth','sxtw','sxtx');
 
@@ -119,9 +119,13 @@ unit agcpugas;
                     result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
                   else
                     result:=linux_addrpage2str[ref.refaddr]+ref.symbol.name
-                end
+                end;
+              addr_pic,
+              { for locals replaced by temp symbols on LLVM }
+              addr_no:
+                result:=ref.symbol.name;
               else
-                internalerror(2015022301);
+                internalerror(2015022302);
             end
           end
         else
@@ -180,6 +184,8 @@ unit agcpugas;
                 result:=result+']';
               AM_PREINDEXED:
                 result:=result+']!';
+              else
+                ;
             end;
           end;
       end;
@@ -234,6 +240,11 @@ unit agcpugas;
               end
             else
               getopstr:=getreferencestring(asminfo,o.ref^);
+          top_realconst:
+            begin
+              str(o.val_real,Result);
+              Result:='#'+Result;
+            end
           else
             internalerror(2014121507);
         end;
@@ -274,7 +285,7 @@ unit agcpugas;
             idtxt  : 'AS';
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
-            supported_targets : [system_aarch64_linux];
+            supported_targets : [system_aarch64_linux,system_aarch64_android];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             comment : '// ';

+ 513 - 12
compiler/aarch64/aoptcpu.pas

@@ -21,26 +21,54 @@
  ****************************************************************************
 }
 
-
 Unit aoptcpu;
 
 {$i fpcdefs.inc}
 
+{ $define DEBUG_AOPTCPU}
+
 Interface
 
-uses cpubase, aasmtai, aopt, aoptcpub;
+    uses
+      globtype, globals,
+      cutils,
+      cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
 
-Type
-  TCpuAsmOptimizer = class(TAsmOptimizer)
-    { uses the same constructor as TAopObj }
-    function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
-    procedure PeepHoleOptPass2;override;
-  End;
+    Type
+      TCpuAsmOptimizer = class(TAsmOptimizer)
+        { uses the same constructor as TAopObj }
+        function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
+        function PostPeepHoleOptsCpu(var p: tai): boolean; override;
+        function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
+        function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
+        function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
+        function LookForPostindexedPattern(p : taicpu) : boolean;
+        procedure DebugMsg(const s : string; p : tai);
+      private
+        function OptPass1Shift(var p: tai): boolean;
+        function OptPostCMP(var p: tai): boolean;
+        function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
+        function OptPass1Data(var p: tai): boolean;
+      End;
 
 Implementation
 
   uses
-    aasmbase,aasmcpu,cgbase;
+    aasmbase,
+    aoptutils,
+    cgutils,
+    verbose;
+
+{$ifdef DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
+    begin
+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
+    end;
+{$else DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
+    begin
+    end;
+{$endif DEBUG_AOPTCPU}
 
   function CanBeCond(p : tai) : boolean;
     begin
@@ -48,16 +76,489 @@ Implementation
     end;
 
 
-  function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
+  function RefsEqual(const r1, r2: treference): boolean;
+    begin
+      refsequal :=
+        (r1.offset = r2.offset) and
+        (r1.base = r2.base) and
+        (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+        (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
+        (r1.relsymbol = r2.relsymbol) and
+        (r1.shiftimm = r2.shiftimm) and
+        (r1.addressmode = r2.addressmode) and
+        (r1.shiftmode = r2.shiftmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        ((op = []) or (taicpu(instr).opcode in op)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+    begin
+      result := (oper.typ = top_reg) and (oper.reg = reg);
+    end;
+
+
+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+    begin
+      result := oper1.typ = oper2.typ;
+
+      if result then
+        case oper1.typ of
+          top_const:
+            Result:=oper1.val = oper2.val;
+          top_reg:
+            Result:=oper1.reg = oper2.reg;
+          top_conditioncode:
+            Result:=oper1.cc = oper2.cc;
+          top_realconst:
+            Result:=oper1.val_real = oper2.val_real;
+          top_ref:
+            Result:=RefsEqual(oper1.ref^, oper2.ref^);
+          else Result:=false;
+        end
+    end;
+
+
+  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    Out Next: tai; reg: TRegister): Boolean;
+    begin
+      Next:=Current;
+      repeat
+        Result:=GetNextInstruction(Next,Next);
+      until not (Result) or
+            not(cs_opt_level3 in current_settings.optimizerswitches) or
+            (Next.typ<>ait_instruction) or
+            RegInInstruction(reg,Next) or
+            is_calljmp(taicpu(Next).opcode);
+    end;
+
+
+  function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
+    var
+      p: taicpu;
+    begin
+      p := taicpu(hp);
+      Result := false;
+      if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
+        exit;
+
+      case p.opcode of
+        { These operands do not write into a register at all }
+        A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
+          exit;
+        {Take care of post/preincremented store and loads, they will change their base register}
+        A_STR, A_LDR:
+          begin
+            Result := false;
+            { actually, this does not apply here because post-/preindexed does not mean that a register
+              is loaded with a new value, it is only modified
+              (taicpu(p).oper[1]^.typ=top_ref) and
+              (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+              (taicpu(p).oper[1]^.ref^.base = reg);
+            }
+            { STR does not load into it's first register }
+            if p.opcode = A_STR then
+              exit;
+          end;
+        else
+          ;
+      end;
+
+      if Result then
+        exit;
+
+      case p.oper[0]^.typ of
+        top_reg:
+          Result := (p.oper[0]^.reg = reg);
+        top_ref:
+          Result :=
+            (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+            (taicpu(p).oper[0]^.ref^.base = reg);
+        else
+          ;
+      end;
+    end;
+
+
+  function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
+    var
+      p: taicpu;
+      i: longint;
+    begin
+      instructionLoadsFromReg := false;
+      if not (assigned(hp) and (hp.typ = ait_instruction)) then
+        exit;
+      p:=taicpu(hp);
+
+      i:=1;
+
+      { Start on oper[0]? }
+      if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
+        i:=0;
+
+      while(i<p.ops) do
+        begin
+          case p.oper[I]^.typ of
+            top_reg:
+              Result := (p.oper[I]^.reg = reg);
+            top_ref:
+              Result :=
+                (p.oper[I]^.ref^.base = reg) or
+                (p.oper[I]^.ref^.index = reg);
+            else
+              ;
+          end;
+          { Bailout if we found something }
+          if Result then
+            exit;
+          Inc(I);
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
+    var
+      alloc,
+      dealloc : tai_regalloc;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if MatchInstruction(movp, A_MOV, [PF_None]) and
+        (taicpu(p).ops>=3) and
+        { We can't optimize if there is a shiftop }
+        (taicpu(movp).ops=2) and
+        MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
+        { don't mess with moves to fp }
+        (taicpu(movp).oper[0]^.reg<>NR_FP) and
+        { the destination register of the mov might not be used beween p and movp }
+        not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+        { Take care to only do this for instructions which REALLY load to the first register.
+          Otherwise
+            str reg0, [reg1]
+            mov reg2, reg0
+          will be optimized to
+            str reg2, [reg1]
+        }
+        RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
+        begin
+          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
+          if assigned(dealloc) then
+            begin
+              DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
+              result:=true;
+
+              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
+                and remove it if possible }
+              asml.Remove(dealloc);
+              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  alloc.free;
+                  dealloc.free;
+                end
+              else
+                asml.InsertAfter(dealloc,p);
+
+              { try to move the allocation of the target register }
+              GetLastInstruction(movp,hp1);
+              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  asml.InsertBefore(alloc,p);
+                  { adjust used regs }
+                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
+                end;
+
+              { finally get rid of the mov }
+              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              then
+                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
+              asml.remove(movp);
+              movp.free;
+            end;
+        end;
+    end;
+
+
+  {
+    optimize
+      ldr/str regX,[reg1]
+      ...
+      add/sub reg1,reg1,regY/const
+
+      into
+
+      ldr/str regX,[reg1], regY/const
+  }
+  function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
+    var
+      hp1 : tai;
+    begin
+      Result:=false;
+      if (p.oper[1]^.typ = top_ref) and
+        (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
+        (p.oper[1]^.ref^.index=NR_NO) and
+        (p.oper[1]^.ref^.offset=0) and
+        GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
+        { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
+        MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
+        (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
+        (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
+        (
+         { valid offset? }
+         (taicpu(hp1).oper[2]^.typ=top_const) and
+         (taicpu(hp1).oper[2]^.val>=-256) and
+         (abs(taicpu(hp1).oper[2]^.val)<256)
+        ) and
+        { don't apply the optimization if the base register is loaded }
+        (getsupreg(p.oper[0]^.reg)<>getsupreg(p.oper[1]^.ref^.base)) and
+        not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
+        not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
+          p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
+          if taicpu(hp1).opcode=A_ADD then
+            p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
+          else
+            p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
+          asml.Remove(hp1);
+          hp1.Free;
+          Result:=true;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
     var
-      next1: tai;
+      hp1,hp2: tai;
+      I2, I: Integer;
+      shifterop: tshifterop;
+    begin
+      Result:=false;
+      { This folds shifterops into following instructions
+        <shiftop> r0, r1, #imm
+        <op> r2, r3, r0
+
+        to
+
+        <op> r2, r3, r1, <shiftop> #imm
+      }
+      { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
+      if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
+         MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
+         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+         MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
+                                A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
+                                A_SUB, A_TST], [PF_None]) and
+         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
+         (taicpu(hp1).ops >= 2) and
+         { Currently we can't fold into another shifterop }
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
+         { SP does not work completely with shifted registers, as I didn't find the exact rules,
+           we do not operate on SP }
+         (taicpu(hp1).oper[0]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[1]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
+         { reg1 might not be modified inbetween }
+         not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
+         (
+           { Only ONE of the two src operands is allowed to match }
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
+         ) and
+         { for SUB, the last operand must match, there is no RSB on AArch64 }
+         ((taicpu(hp1).opcode<>A_SUB) or
+          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
+        begin
+          { for the two operand instructions, start also at the second operand as they are not always commutative
+            (depends on the flags tested laster on) and thus the operands cannot swapped }
+          I2:=1;
+          for I:=I2 to taicpu(hp1).ops-1 do
+            if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
+              begin
+                { If the parameter matched on the second op from the RIGHT
+                  we have to switch the parameters, this will not happen for CMP
+                  were we're only evaluating the most right parameter
+                }
+                shifterop_reset(shifterop);
+                case taicpu(p).opcode of
+                  A_LSL:
+                    shifterop.shiftmode:=SM_LSL;
+                  A_ROR:
+                    shifterop.shiftmode:=SM_ROR;
+                  A_LSR:
+                    shifterop.shiftmode:=SM_LSR;
+                  A_ASR:
+                    shifterop.shiftmode:=SM_ASR;
+                  else
+                    InternalError(2019090401);
+                end;
+                shifterop.shiftimm:=taicpu(p).oper[2]^.val;
+
+                if I <> taicpu(hp1).ops-1 then
+                  begin
+                    if taicpu(hp1).ops = 3 then
+                      hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
+                           taicpu(p).oper[1]^.reg, shifterop)
+                    else
+                      hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                           shifterop);
+                  end
+                else
+                  if taicpu(hp1).ops = 3 then
+                    hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
+                         taicpu(p).oper[1]^.reg,shifterop)
+                  else
+                    hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                         shifterop);
+
+                taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+                asml.insertbefore(hp2, hp1);
+                GetNextInstruction(p, hp2);
+                asml.remove(p);
+                asml.remove(hp1);
+                p.free;
+                hp1.free;
+                p:=hp2;
+                DebugMsg('Peephole FoldShiftProcess done', p);
+                Result:=true;
+                break;
+              end;
+        end
+      else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
+        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
+        Result:=true;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
+    var
+      hp1: tai;
+    begin
+      result:=false;
+      if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean;
+    var
+     hp1,hp2: tai;
+    begin
+      Result:=false;
+      if MatchOpType(taicpu(p),top_reg,top_const) and
+        (taicpu(p).oper[1]^.val=0) and
+        GetNextInstruction(p,hp1) and
+        MatchInstruction(hp1,A_B,[PF_None]) and
+        (taicpu(hp1).condition in [C_EQ,C_NE]) then
+        begin
+          case taicpu(hp1).condition of
+            C_NE:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            C_EQ:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            else
+              Internalerror(2019090801);
+          end;
+          taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+          asml.insertbefore(hp2, hp1);
+
+          asml.remove(p);
+          asml.remove(hp1);
+          p.free;
+          hp1.free;
+          p:=hp2;
+          DebugMsg('Peephole CMPB.E/NE2CBNZ/CBZ done', p);
+          Result:=true;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     begin
       result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_LDR:
+              begin
+                Result:=LookForPostindexedPattern(taicpu(p));
+              end;
+            A_STR:
+              begin
+                Result:=LookForPostindexedPattern(taicpu(p));
+              end;
+            A_LSR,
+            A_ROR,
+            A_ASR,
+            A_LSL:
+              Result:=OptPass1Shift(p);
+            A_ADD,
+            A_ADC,
+            A_SUB,
+            A_SBC,
+            A_AND,
+            A_BIC,
+            A_EOR,
+            A_ORR,
+            A_MUL:
+              Result:=OptPass1Data(p);
+            else
+              ;
+          end;
+        end;
     end;
 
 
-  procedure TCpuAsmOptimizer.PeepHoleOptPass2;
+  function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
     begin
+      result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_CMP:
+              Result:=OptPostCMP(p);
+            else
+              ;
+          end;
+        end;
     end;
 
 begin

+ 2 - 4
compiler/aarch64/aoptcpub.pas

@@ -76,10 +76,6 @@ Const
 
   MaxCh = 3;
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 4;
-
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 
@@ -146,6 +142,8 @@ Implementation
                   exit
                 end;
             end;
+          else
+            ;
         end;
     end;
 

+ 163 - 53
compiler/aarch64/cgcpu.pas

@@ -100,6 +100,8 @@ interface
         procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
         procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
         procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
+        procedure g_profilecode(list: TAsmList);override;
        private
         function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
         procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
@@ -185,7 +187,7 @@ implementation
               href.refaddr:=addr_gotpage;
             list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
             { load the GOT entry (= address of the variable) }
-            reference_reset_base(href,preferred_newbasereg,0,sizeof(pint),[]);
+            reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
             href.symbol:=ref.symbol;
             { code symbols defined in the current compilation unit do not
               have to be accessed via the GOT }
@@ -245,7 +247,7 @@ implementation
                       so.shiftmode:=ref.shiftmode;
                       so.shiftimm:=ref.shiftimm;
                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
-                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment,ref.volatility);
+                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
                       { possibly still an invalid offset -> fall through }
                     end
                   else if ref.offset<>0 then
@@ -291,7 +293,7 @@ implementation
                     end
                   else
                     a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
-                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment,ref.volatility);
+                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
                   { fall through to the handling of base + offset, since the
                     offset may still be too big }
                 end;
@@ -379,11 +381,9 @@ implementation
                                 a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
                                 ref.offset:=0;
                               end;
-                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment,ref.volatility);
+                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
                           end;
                       end
-                    else
-                      internalerror(2014110904);
                   end;
                 end;
               A_LDP,A_STP:
@@ -407,7 +407,7 @@ implementation
                             preferred_newbasereg:=getaddressregister(list);
                       end;
                       a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
-                      reference_reset_base(ref,preferred_newbasereg,0,ref.alignment,ref.volatility);
+                      reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
                     end
                 end;
               A_LDUR,A_STUR:
@@ -429,7 +429,7 @@ implementation
         if preferred_newbasereg=NR_NO then
           preferred_newbasereg:=getaddressregister(list);
         a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
-        reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset),ref.volatility);
+        reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
       end;
 
 
@@ -702,7 +702,10 @@ implementation
         hreg: tregister;
       begin
         if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
-          fromsize:=tosize
+          begin
+            fromsize:=tosize;
+            reg:=makeregsize(list,reg,fromsize);
+          end
         { have a 32 bit register but need a 64 bit one? }
         else if tosize in [OS_64,OS_S64] then
           begin
@@ -807,35 +810,80 @@ implementation
     procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
       var
         href: treference;
-        hreg1, hreg2, tmpreg: tregister;
+        hreg1, hreg2, tmpreg,tmpreg2: tregister;
+        i : Integer;
       begin
-        if fromsize in [OS_64,OS_S64] then
-          begin
-            { split into two 32 bit loads }
-            hreg1:=getintregister(list,OS_32);
-            hreg2:=getintregister(list,OS_32);
-            if target_info.endian=endian_big then
-              begin
-                tmpreg:=hreg1;
-                hreg1:=hreg2;
-                hreg2:=tmpreg;
-              end;
-            { can we use LDP? }
-            if (ref.alignment=4) and
-               (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
-              list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
-            else
-              begin
-                a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
-                href:=ref;
-                inc(href.offset,4);
-                a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
-              end;
-            a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
-            list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
-          end
-       else
-         inherited;
+        case fromsize of
+          OS_64,OS_S64:
+            begin
+              { split into two 32 bit loads }
+              hreg1:=getintregister(list,OS_32);
+              hreg2:=getintregister(list,OS_32);
+              if target_info.endian=endian_big then
+                begin
+                  tmpreg:=hreg1;
+                  hreg1:=hreg2;
+                  hreg2:=tmpreg;
+                end;
+              { can we use LDP? }
+              if (ref.alignment=4) and
+                 (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+              else
+                begin
+                  a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                  href:=ref;
+                  inc(href.offset,4);
+                  a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+                end;
+              a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
+              list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+            end;
+          OS_16,OS_S16,
+          OS_32,OS_S32:
+            begin
+              if ref.alignment=2 then
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-2);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset,2)
+                      else
+                        inc(href.offset,2);
+                      a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end
+              else
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-1);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to tcgsize2size[fromsize]-1 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset)
+                      else
+                        inc(href.offset);
+                      a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end;
+            end;
+          else
+            inherited;
+        end;
       end;
 
 
@@ -977,6 +1025,7 @@ implementation
             { Notify the register allocator that we have written a move
               instruction so it can try to eliminate it. }
             add_move_instruction(instr);
+            { FMOV cannot generate a floating point exception }
           end
         else
           begin
@@ -984,6 +1033,7 @@ implementation
                (reg_cgsize(reg2)<>tosize) then
               internalerror(2014110913);
             instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+            maybe_check_for_fpu_exception(list);
           end;
         list.Concat(instr);
       end;
@@ -1037,13 +1087,19 @@ implementation
 
 
      procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       var
+         r : tregister;
        begin
          if not shufflescalar(shuffle) then
            internalerror(2014122802);
          if not(tcgsize2size[fromsize] in [4,8]) or
-            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+            (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
            internalerror(2014122804);
-         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+         if tcgsize2size[fromsize]<tcgsize2size[tosize] then
+           r:=makeregsize(intreg,fromsize)
+         else
+           r:=intreg;
+         list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
        end;
 
 
@@ -1073,18 +1129,15 @@ implementation
 
     procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       var
-        bitsize,
-        signbit: longint;
+        bitsize: longint;
       begin
         if srcsize in [OS_64,OS_S64] then
           begin
             bitsize:=64;
-            signbit:=6;
           end
         else
           begin
             bitsize:=32;
-            signbit:=5;
           end;
         { source is 0 -> dst will have to become 255 }
         list.concat(taicpu.op_reg_const(A_CMP,src,0));
@@ -1254,6 +1307,8 @@ implementation
               a_load_const_reg(list,size,a,dst);
               exit;
             end;
+          else
+            ;
         end;
         case op of
           OP_ADD,
@@ -1402,6 +1457,8 @@ implementation
                     check for overflow) }
                   internalerror(2014122101);
                 end;
+              else
+                internalerror(2019050936);
             end;
           end;
         a_op_reg_reg_reg(list,op,size,src1,src2,dst);
@@ -1522,7 +1579,7 @@ implementation
         pairreg: tregister;
       begin
         result:=0;
-        reference_reset_base(ref,NR_SP,-16,16,[]);
+        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_PREINDEXED;
         pairreg:=NR_NO;
         { store all used registers pairwise }
@@ -1570,7 +1627,7 @@ implementation
         localsize:=align(localsize,16);
 
         { save stack pointer and return address }
-        reference_reset_base(ref,NR_SP,-16,16,[]);
+        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_PREINDEXED;
         list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
         { initialise frame pointer }
@@ -1646,7 +1703,7 @@ implementation
         pairreg: tregister;
         regcount: longint;
       begin
-        reference_reset_base(ref,NR_SP,16,16,[]);
+        reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_POSTINDEXED;
         { highest reg stored twice? }
         regcount:=0;
@@ -1686,7 +1743,14 @@ implementation
         regsstored: boolean;
         sr: tsuperregister;
       begin
-        if not nostackframe then
+        if not(nostackframe) and
+          { we do not need an exit stack frame when we never return
+
+            * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
+            * the entry stack frame must be normally generated because the subroutine could be still left by
+              an exception and then the unwinding code might need to restore the registers stored by the entry code
+          }
+          not(po_noreturn in current_procinfo.procdef.procoptions) then
           begin
             { if no registers have been stored, we don't have to subtract the
               allocated temp space from the stack pointer }
@@ -1717,7 +1781,7 @@ implementation
               a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
 
             { restore framepointer and return address }
-            reference_reset_base(ref,NR_SP,16,16,[]);
+            reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
             ref.addressmode:=AM_POSTINDEXED;
             list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
           end;
@@ -1744,9 +1808,9 @@ implementation
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -1918,12 +1982,12 @@ implementation
           basereplaced:=true;
           if forcepostindexing then
             begin
-              reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment,ref.volatility);
+              reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
               ref.addressmode:=AM_POSTINDEXED;
             end
           else
             begin
-              reference_reset_base(ref,tmpreg,0,ref.alignment,ref.volatility);
+              reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
               ref.addressmode:=AM_OFFSET;
             end
         end;
@@ -2203,6 +2267,52 @@ implementation
       end;
 
 
+    procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l1,l2: TAsmLabel;
+      begin
+        { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
+            list.concat(taicpu.op_reg_const(A_TST,r,$1f));
+            current_asmdata.getjumplabel(l1);
+            current_asmdata.getjumplabel(l2);
+            ai:=taicpu.op_sym(A_B,l1);
+            ai.is_jmp:=true;
+            ai.condition:=C_NE;
+            list.concat(ai);
+            list.concat(taicpu.op_reg_const(A_TST,r,$80));
+            ai:=taicpu.op_sym(A_B,l2);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            a_label(list,l1);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l2);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_aarch64_linux then
+          begin
+            list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
+            a_call_name(list,'_mcount',false);
+          end
+        else
+          internalerror(2020021901);
+      end;
+
 
     procedure create_codegen;
       begin

+ 76 - 24
compiler/aarch64/cpubase.pas

@@ -48,6 +48,8 @@ unit cpubase;
     type
       TAsmOp= {$i a64op.inc}
 
+      TAsmOps = set of TAsmOp;
+
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
 
@@ -104,7 +106,7 @@ unit cpubase;
       std_param_align = 8;
 
       { TODO: Calculate bsstart}
-      regnumber_count_bsstart = 128;
+      regnumber_count_bsstart = 256;
 
       regnumber_table : array[tregisterindex] of tregister = (
         {$i ra64num.inc}
@@ -121,9 +123,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                           Instruction post fixes
 *****************************************************************************}
@@ -200,7 +199,7 @@ unit cpubase;
       tshiftmode = (SM_None,
                     { shifted register instructions. LSL can also be used for
                       the index register of certain loads/stores }
-                    SM_LSL,SM_LSR,SM_ASR,
+                    SM_LSL,SM_LSR,SM_ASR,SM_ROR,
                     { extended register instructions: zero/sign extension +
                         optional shift (interpreted as LSL after extension)
                        -- the index register of certain loads/stores can be
@@ -305,25 +304,6 @@ unit cpubase;
       NR_DEFAULTFLAGS = NR_NZCV;
       RS_DEFAULTFLAGS = RS_NZCV;
 
-{*****************************************************************************
-                       GCC /ABI linking information
-*****************************************************************************}
-
-    const
-      { Registers which must be saved when calling a routine declared as
-        cppdecl, cdecl, stdcall, safecall, palmossyscall. The registers
-        saved should be the ones as defined in the target ABI and / or GCC.
-
-        This value can be deduced from the CALLED_USED_REGISTERS array in the
-        GCC source.
-      }
-      saved_standard_registers : array[0..9] of tsuperregister =
-        (RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28);
-      saved_mm_registers : array[0..7] of tsuperregister = (RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15);
-
-      { this is only for the generic code which is not used for this architecture }
-      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
-
 {*****************************************************************************
                                   Helpers
 *****************************************************************************}
@@ -341,11 +321,17 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
     function dwarf_reg(r:tregister):shortint;
+    function dwarf_reg_no_error(r:tregister):shortint;
+    function eh_return_data_regno(nr: longint): longint;
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
+    function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
 
 
   implementation
@@ -384,8 +370,11 @@ unit cpubase;
           R_MMREGISTER:
             begin
               case s of
+                { records }
+                OS_32,
                 OS_F32:
                   cgsize2subreg:=R_SUBMMS;
+                OS_64,
                 OS_F64:
                   cgsize2subreg:=R_SUBMMD;
                 else
@@ -502,6 +491,26 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function dwarf_reg(r:tregister):shortint;
       begin
         result:=regdwarf_table[findreg_by_number(r)];
@@ -509,6 +518,10 @@ unit cpubase;
           internalerror(200603251);
       end;
 
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
       var
@@ -618,4 +631,43 @@ unit cpubase;
           end;
       end;
 
+
+  function eh_return_data_regno(nr: longint): longint;
+    begin
+      if (nr>=0) and (nr<2) then
+        result:=nr
+      else
+        result:=-1;
+    end;
+
+
+  function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+    var
+      singlerec : tcompsinglerec;
+      doublerec : tcompdoublerec;
+    begin
+      Result:=false;
+      case ft of
+        s32real:
+          begin
+            singlerec.value:=value;
+            singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+            Result:=(singlerec.bytes[0]=0) and (singlerec.bytes[1]=0) and ((singlerec.bytes[2] and 7)=0)  and
+              (((singlerec.bytes[3] and $7e)=$40) or ((singlerec.bytes[3] and $7e)=$3e));
+          end;
+        s64real:
+          begin
+            doublerec.value:=value;
+            doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+            Result:=(doublerec.bytes[0]=0) and (doublerec.bytes[1]=0) and (doublerec.bytes[2]=0) and
+                    (doublerec.bytes[3]=0) and (doublerec.bytes[4]=0) and (doublerec.bytes[5]=0) and
+                    ((((doublerec.bytes[6] and $c0)=$0) and ((doublerec.bytes[7] and $7f)=$40)) or
+                     (((doublerec.bytes[6] and $c0)=$c0) and ((doublerec.bytes[7] and $7f)=$3f)));
+          end;
+        else
+          ;
+      end;
+    end;
+
+
 end.

+ 8 - 4
compiler/aarch64/cpuinfo.pas

@@ -14,6 +14,8 @@
 
 Unit CPUInfo;
 
+{$i fpcdefs.inc}
+
 Interface
 
   uses
@@ -56,13 +58,15 @@ Type
 
 
 Const
+   fputypestrllvm : array[tfputype] of string[6] = ('',
+     ''
+   );
+
    { Is there support for dealing with multiple microcontrollers available }
    { for this platform? }
    ControllerSupport = false; (* Not yet at least ;-) *)
    {# Size of native extended floating point type }
    extended_size = 8;
-   {# Size of a multimedia register               }
-   mmreg_size = 16;
    { target cpu string (used by compiler options) }
    target_cpu_string = 'aarch64';
 
@@ -108,12 +112,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
 				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 

+ 7 - 4
compiler/aarch64/cpunode.pas

@@ -31,11 +31,14 @@ implementation
 
   uses
     ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
-    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,
-    { this not really a node }
-    rgcpu,
     { symtable }
     symcpu,
-    aasmdef;
+    aasmdef,
+{$ifndef llvm}
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,ncpucon
+{$else llvm}
+    llvmnode
+{$endif llvm}
+    ;
 
 end.

+ 125 - 85
compiler/aarch64/cpupara.pas

@@ -30,17 +30,19 @@ unit cpupara;
        globtype,globals,
        aasmtai,aasmdata,
        cpuinfo,cpubase,cgbase,cgutils,
-       symconst,symbase,symtype,symdef,parabase,paramgr;
+       symconst,symbase,symtype,symdef,parabase,paramgr,armpara;
 
     type
-       tcpuparamanager = class(tparamanager)
+       tcpuparamanager = class(tarmgenparamanager)
           function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
+          function get_saved_registers_int(calloption: tproccalloption): tcpuregisterarray; override;
+          function get_saved_registers_mm(calloption: tproccalloption): tcpuregisterarray; override;
           function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
           function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
           function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p: tabstractprocdef; side: tcallercallee; varargspara: tvarargsparalist):longint;override;
           function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
           function param_use_paraloc(const cgpara: tcgpara): boolean; override;
          private
@@ -50,6 +52,7 @@ unit cpupara;
 
           procedure init_para_alloc_values;
           procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+          function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
 
           procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
        end;
@@ -87,83 +90,25 @@ unit cpupara;
       end;
 
 
-    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
-      var
-        i: longint;
-        sym: tsym;
-        tmpelecount: longint;
+    function tcpuparamanager.get_saved_registers_int(calloption: tproccalloption): tcpuregisterarray;
+      const
+        saved_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..9] of tsuperregister{$endif} =
+          (RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28);
       begin
-        result:=false;
-        case p.typ of
-          arraydef:
-            begin
-              if is_special_array(p) then
-                exit;
-              { an array of empty records has no influence }
-              if tarraydef(p).elementdef.size=0 then
-                begin
-                  result:=true;
-                  exit
-                end;
-              tmpelecount:=0;
-              if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
-                exit;
-              { tmpelecount now contains the number of hfa elements in a
-                single array element (e.g. 2 if it's an array of a record
-                containing two singles) -> multiply by number of elements
-                in the array }
-              inc(elecount,tarraydef(p).elecount*tmpelecount);
-              if elecount>4 then
-                exit;
-              result:=true;
-            end;
-          floatdef:
-            begin
-              if not assigned(basedef) then
-                basedef:=p
-              else if basedef<>p then
-                exit;
-              inc(elecount);
-              result:=true;
-            end;
-          recorddef:
-            begin
-              for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
-                begin
-                  sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
-                  if sym.typ<>fieldvarsym then
-                    continue;
-                  if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
-                    exit
-                end;
-              result:=true;
-            end;
-          else
-            exit
-        end;
+        result:=saved_regs;
       end;
 
 
-    { Returns whether a def is a "homogeneous float array" at the machine level.
-      This means that in the memory layout, the def only consists of maximally
-      4 floating point values that appear consecutively in memory }
-    function is_hfa(p: tdef; out basedef: tdef) : boolean;
-      var
-        elecount: longint;
+    function tcpuparamanager.get_saved_registers_mm(calloption: tproccalloption): tcpuregisterarray;
+      const
+        saved_mm_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..7] of tsuperregister{$endif} =
+          (RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15);
       begin
-        result:=false;
-        basedef:=nil;
-        elecount:=0;
-        result:=is_hfa_internal(p,basedef,elecount);
-        result:=
-          result and
-          (elecount>0) and
-          (elecount<=4) and
-          (p.size=basedef.size*elecount)
+        result:=saved_mm_regs;
       end;
 
 
-    function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
+    function tcpuparamanager.getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
       var
         hfabasedef: tdef;
       begin
@@ -251,7 +196,8 @@ unit cpupara;
               then indexed beyond its bounds) }
           arraydef:
             result:=
-              (calloption in cdecl_pocalls) or
+              ((calloption in cdecl_pocalls) and
+               not is_dynamic_array(def)) or
               is_open_array(def) or
               is_array_of_const(def) or
               is_array_constructor(def) or
@@ -263,6 +209,8 @@ unit cpupara;
             result:=def.size>16;
           stringdef :
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+          else
+            ;
         end;
       end;
 
@@ -344,6 +292,26 @@ unit cpupara;
          if not assigned(result.location) or
             not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
            internalerror(2014113001);
+{$ifndef llvm}
+         {
+           According to ARM64 ABI: "If the size of the argument is less than 8 bytes then
+           the size of the argument is set to 8 bytes. The effect is as if the argument
+           was copied to the least significant bits of a 64-bit register and the remaining
+           bits filled with unspecified values."
+
+           Therefore at caller side force the ordinal result to be always 64-bit, so it
+           will be stripped to the required size and uneeded bits are discarded.
+
+           This is not required for iOS, where the result is zero/sign extended.
+         }
+         if (target_info.abi<>abi_aarch64_darwin) and
+            (side=callerside) and (result.location^.loc = LOC_REGISTER) and
+            (result.def.size<8) and is_ordinal(result.def) then
+           begin
+             result.location^.size:=OS_64;
+             result.location^.def:=u64inttype;
+           end;
+{$endif}
       end;
 
 
@@ -381,11 +349,16 @@ unit cpupara;
         if (p.proccalloption in cstylearrayofconst) and
            is_array_of_const(paradef) then
           begin
+            result.size:=OS_NO;
+            result.def:=paradef;
+            result.alignment:=std_param_align;
+            result.intsize:=0;
             paraloc:=result.add_location;
             { hack: the paraloc must be valid, but is not actually used }
             paraloc^.loc:=LOC_REGISTER;
             paraloc^.register:=NR_X0;
             paraloc^.size:=OS_ADDR;
+            paraloc^.def:=paradef;
             exit;
           end;
 
@@ -403,7 +376,7 @@ unit cpupara;
             else
               paralen:=tcgsize2size[def_cgsize(paradef)];
             loc:=getparaloc(p.proccalloption,paradef);
-            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+            if (paradef.typ in [objectdef,arraydef,recorddef,setdef]) and
                not is_special_array(paradef) and
                (varspez in [vs_value,vs_const]) then
               paracgsize:=int_cgsize(paralen)
@@ -472,6 +445,8 @@ unit cpupara;
                    loc:=LOC_REFERENCE;
                  end;
              end;
+           else
+             ;
          end;
 
          { allocate registers/stack locations }
@@ -515,6 +490,44 @@ unit cpupara;
              begin
                paraloc^.size:=locsize;
                paraloc^.def:=locdef;
+{$ifdef llvm}
+               if not is_ordinal(paradef) then
+                 begin
+                   case locsize of
+                     OS_8,OS_16,OS_32:
+                       begin
+                         paraloc^.size:=OS_64;
+                         paraloc^.def:=u64inttype;
+                       end;
+                     OS_S8,OS_S16,OS_S32:
+                       begin
+                         paraloc^.size:=OS_S64;
+                         paraloc^.def:=s64inttype;
+                       end;
+                     OS_F32:
+                       begin
+                         paraloc^.size:=OS_F32;
+                         paraloc^.def:=s32floattype;
+                       end;
+                     OS_F64:
+                       begin
+                         paraloc^.size:=OS_F64;
+                         paraloc^.def:=s64floattype;
+                       end;
+                     else
+                       begin
+                         if is_record(locdef) or
+                            is_set(locdef) or
+                            ((locdef.typ=arraydef) and
+                             not is_special_array(locdef)) then
+                           begin
+                             paraloc^.size:=OS_64;
+                             paraloc^.def:=u64inttype;
+                           end
+                       end;
+                   end;
+                 end;
+{$endif llvm}
              end;
 
            { paraloc loc }
@@ -532,12 +545,31 @@ unit cpupara;
                     responsibility to sign or zero-extend arguments having fewer
                     than 32 bits, and that unused bits in a register are
                     unspecified. In iOS, however, the caller must perform such
-                    extensions, up to 32 bits." }
-                 if (target_info.abi=abi_aarch64_darwin) and
-                    (side=callerside) and
-                    is_ordinal(paradef) and
-                    (paradef.size<4) then
-                   paraloc^.size:=OS_32;
+                    extensions, up to 32 bits."
+                    Zero extend an argument at caller side for iOS and
+                    ignore the argument's unspecified high bits at callee side for
+                    all other platforms. }
+                 if (paradef.size<4) and is_ordinal(paradef) then
+                   begin
+                     if target_info.abi=abi_aarch64_darwin then
+                       begin
+                         if side=callerside then
+                           begin
+                             paraloc^.size:=OS_32;
+                             paraloc^.def:=u32inttype;
+                           end;
+                       end
+{$ifndef llvm}
+                     else
+                       begin
+                         if side=calleeside then
+                           begin
+                             paraloc^.size:=OS_32;
+                             paraloc^.def:=u32inttype;
+                           end;
+                       end;
+{$endif llvm}
+                   end;
 
                  { in case it's a composite, "The argument is passed as though
                    it had been loaded into the registers from a double-word-
@@ -548,7 +580,7 @@ unit cpupara;
                  if (target_info.endian=endian_big) and
                     not(paraloc^.size in [OS_64,OS_S64]) and
                     (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
-                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size]);
+                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size])*8;
                end;
              LOC_MMREGISTER:
                begin
@@ -562,7 +594,7 @@ unit cpupara;
                   paraloc^.loc:=LOC_REFERENCE;
 
                   { the current stack offset may not be properly aligned in
-                    case we're on Darwin have allocated a non-variadic argument
+                    case we're on Darwin and have allocated a non-variadic argument
                     < 8 bytes previously }
                   if target_info.abi=abi_aarch64_darwin then
                     curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
@@ -614,12 +646,12 @@ unit cpupara;
      end;
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p: tabstractprocdef; side: tcallercallee; varargspara: tvarargsparalist):longint;
       begin
         init_para_alloc_values;
 
         { non-variadic parameters }
-        create_paraloc_info_intern(p,callerside,p.paras,false);
+        create_paraloc_info_intern(p,side,p.paras,false);
         if p.proccalloption in cstylearrayofconst then
           begin
             { on Darwin, we cannot use any registers for variadic parameters }
@@ -629,11 +661,19 @@ unit cpupara;
                 curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
               end;
             { continue loading the parameters  }
-            create_paraloc_info_intern(p,callerside,varargspara,true);
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  create_paraloc_info_intern(p,side,varargspara,true)
+                else
+                  internalerror(2019021916);
+              end;
             result:=curstackoffset;
           end
         else
           internalerror(200410231);
+
+        create_funcretloc_info(p,side);
       end;
 
 begin

+ 3 - 0
compiler/aarch64/cputarg.pas

@@ -41,6 +41,9 @@ implementation
     {$ifndef NOTARGETBSD}
       ,t_bsd
     {$endif}
+    {$ifndef NOTARGETANDROID}
+      ,t_android
+    {$endif}
 
 {**************************************
              Assemblers

+ 11 - 7
compiler/aarch64/hlcgcpu.pas

@@ -45,8 +45,6 @@ interface
       procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
     end;
 
-  procedure create_hlcodegen;
-
 implementation
 
   uses
@@ -64,7 +62,10 @@ implementation
     begin
       tocgsize:=def_cgsize(tosize);
       if (sreg.startbit<>0) or
-         not(sreg.bitlen in [32,64]) then
+         not((sreg.subsetregsize in [OS_32,OS_S32]) and
+             (sreg.bitlen=32)) or
+         not((sreg.subsetregsize in [OS_64,OS_S64]) and
+             (sreg.bitlen=64)) then
         begin
           if is_signed(subsetsize) then
             op:=A_SBFX
@@ -160,7 +161,7 @@ implementation
       if make_global then
         list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0,procdef))
       else
-        list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0,procdef));
+        list.concat(Tai_symbol.Createname_hidden(labelname,AT_FUNCTION,0,procdef));
 
       { set param1 interface to self  }
       procdef.init_paraloc_info(callerside);
@@ -185,11 +186,11 @@ implementation
           if (procdef.extnumber=$ffff) then
             Internalerror(200006139);
           { mov  0(%rdi),%rax ; load vmt}
-          reference_reset_base(href,voidpointertype,paraloc^.register,0,sizeof(pint),[]);
+          reference_reset_base(href,voidpointertype,paraloc^.register,0,ctempposinvalid,sizeof(pint),[]);
           getcpuregister(list,NR_IP0);
           a_load_ref_reg(list,voidpointertype,voidpointertype,href,NR_IP0);
           { jmp *vmtoffs(%eax) ; method offs }
-          reference_reset_base(href,voidpointertype,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint),[]);
+          reference_reset_base(href,voidpointertype,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),ctempposinvalid,sizeof(pint),[]);
           op:=A_LDR;
           tcgaarch64(cg).make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
           list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
@@ -219,11 +220,14 @@ implementation
     end;
 
 
-  procedure create_hlcodegen;
+  procedure create_hlcodegen_cpu;
     begin
       hlcg:=thlcgaarch64.create;
       create_codegen;
     end;
 
 
+begin
+  chlcgobj:=thlcgaarch64;
+  create_hlcodegen:=@create_hlcodegen_cpu;
 end.

+ 9 - 0
compiler/aarch64/ncpuadd.pas

@@ -34,6 +34,7 @@ interface
           function  GetResFlags(unsigned:Boolean):TResFlags;
           function  GetFPUResFlags:TResFlags;
        protected
+          function use_fma : boolean;override;
           procedure second_addfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpboolean;override;
@@ -62,6 +63,12 @@ interface
                                taarch64addnode
 *****************************************************************************}
 
+    function taarch64addnode.use_fma : boolean;
+      begin
+        Result:=true;
+      end;
+
+
     function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
       begin
         case NodeType of
@@ -211,6 +218,7 @@ interface
 
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
            location.register,left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -231,6 +239,7 @@ interface
         { signalling compare so we can get exceptions }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
              left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 

+ 0 - 1
compiler/aarch64/ncpucnv.pas

@@ -142,7 +142,6 @@ implementation
   procedure taarch64typeconvnode.second_int_to_bool;
     var
       resflags: tresflags;
-      hlabel: tasmlabel;
     begin
       if (nf_explicit in flags) and
          not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then

+ 90 - 0
compiler/aarch64/ncpucon.pas

@@ -0,0 +1,90 @@
+{
+    Copyright (c) 2005 by Florian Klaempfl
+
+    Code generation for const nodes on the AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpucon;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncgcon,cpubase;
+
+    type
+      taarch64realconstnode = class(tcgrealconstnode)
+        function pass_1 : tnode;override;
+        procedure pass_generate_code;override;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      globtype,globals,
+      cpuinfo,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symdef,
+      defutil,
+      cgbase,cgutils,cgobj,
+      procinfo,
+      ncon;
+
+{*****************************************************************************
+                           TARMREALCONSTNODE
+*****************************************************************************}
+
+    function taarch64realconstnode.pass_1 : tnode;
+      begin
+        result:=nil;
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           expectloc:=LOC_MMREGISTER
+         else
+           result:=Inherited pass_1;
+      end;
+
+
+    procedure taarch64realconstnode.pass_generate_code;
+      var
+        hreg : TRegister;
+      begin
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_realconst(A_FMOV,
+              location.register,value_real));
+          end
+        { cast and compare the bit pattern as we cannot handle -0.0 }
+        else if bestrealrec(value_real).Data=0 then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            hreg:=newreg(R_MMREGISTER,getsupreg(location.register),R_SUBMM16B);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EOR,
+              hreg,hreg,hreg));
+          end
+        else
+          Inherited pass_generate_code;
+      end;
+
+begin
+  crealconstnode:=taarch64realconstnode;
+end.

+ 94 - 1
compiler/aarch64/ncpuinl.pas

@@ -35,6 +35,7 @@ interface
         function first_sqrt_real: tnode; override;
         function first_round_real: tnode; override;
         function first_trunc_real: tnode; override;
+        function first_fma : tnode; override;
         procedure second_abs_real; override;
         procedure second_sqr_real; override;
         procedure second_sqrt_real; override;
@@ -42,6 +43,7 @@ interface
         procedure second_round_real; override;
         procedure second_trunc_real; override;
         procedure second_get_frame; override;
+        procedure second_fma; override;
       private
         procedure load_fpu_location;
       end;
@@ -53,6 +55,7 @@ implementation
       globtype,verbose,globals,
       cpuinfo, defutil,symdef,aasmdata,aasmcpu,
       cgbase,cgutils,pass_1,pass_2,
+      ncal,
       cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
 
 {*****************************************************************************
@@ -104,10 +107,22 @@ implementation
       end;
 
 
+     function taarch64inlinenode.first_fma : tnode;
+       begin
+         if ((is_double(resultdef)) or (is_single(resultdef))) then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
+         else
+           Result:=inherited first_fma;
+       end;
+
     procedure taarch64inlinenode.second_abs_real;
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -115,6 +130,7 @@ implementation
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -122,13 +138,13 @@ implementation
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
     procedure taarch64inlinenode.second_abs_long;
       var
         opsize : tcgsize;
-        hp : taicpu;
       begin
         secondpass(left);
         opsize:=def_cgsize(left.resultdef);
@@ -155,6 +171,7 @@ implementation
         { convert to signed integer rounding towards zero (there's no "round to
           integer using current rounding mode") }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -179,6 +196,82 @@ implementation
         location.register:=NR_FRAME_POINTER_REG;
       end;
 
+
+    procedure taarch64inlinenode.second_fma;
+      const
+        op : array[false..true,false..true] of TAsmOp =
+          { positive product }
+          (
+           { positive third operand }
+           (A_FMADD,
+           { negative third operand }
+            A_FNMSUB),
+           { negative product }
+            { positive third operand }
+            (A_FMSUB,
+             A_FNMADD)
+           );
+
+      var
+        paraarray : array[1..3] of tnode;
+        i : integer;
+        negop3,
+        negproduct : boolean;
+      begin
+        negop3:=false;
+        negproduct:=false;
+        paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
+        paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+        paraarray[3]:=tcallparanode(parameters).paravalue;
+
+        { check if a neg. node can be removed
+          this is possible because changing the sign of
+          a floating point number does not affect its absolute
+          value in any way
+        }
+        if paraarray[1].nodetype=unaryminusn then
+          begin
+            paraarray[1]:=tunarynode(paraarray[1]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[2].nodetype=unaryminusn then
+          begin
+            paraarray[2]:=tunarynode(paraarray[2]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[3].nodetype=unaryminusn then
+          begin
+            paraarray[3]:=tunarynode(paraarray[3]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negop3:=true;
+          end;
+
+         for i:=1 to 3 do
+          secondpass(paraarray[i]);
+
+        { no memory operand is allowed }
+        for i:=1 to 3 do
+          begin
+            if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
+          end;
+
+        location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(op[negproduct,negop3],
+          location.register,paraarray[1].location.register,paraarray[2].location.register,paraarray[3].location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+      end;
+
+
 begin
   cinlinenode:=taarch64inlinenode;
 end.

+ 75 - 9
compiler/aarch64/ncpumat.pas

@@ -76,9 +76,58 @@ implementation
          resultreg  : tregister;
          hl : tasmlabel;
          overflowloc: tlocation;
+         power: longint;
+
+       procedure genOrdConstNodeDiv;
+         var
+           helper1, helper2: TRegister;
+           so: tshifterop;
+         begin
+           if tordconstnode(right).value=0 then
+             internalerror(2020021601)
+           else if tordconstnode(right).value=1 then
+             cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
+           else if (tordconstnode(right).value = int64(-1)) then
+             begin
+               // note: only in the signed case possible..., may overflow
+               if cs_check_overflow in current_settings.localswitches then
+                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
+               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,
+                 resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
+             end
+           else if ispowerof2(tordconstnode(right).value,power) then
+             begin
+               if (is_signed(right.resultdef)) then
+                 begin
+                    helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                    if power = 1 then
+                      helper1:=numerator
+                    else
+                      begin
+                        helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                        cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,63,numerator,helper1);
+                      end;
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_LSR;
+                    so.shiftimm:=64-power;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
+                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,power,helper2,resultreg);
+                  end
+               else
+                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
+             end
+           else
+             { Everything else is handled in the generic code }
+             cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
+               tordconstnode(right).value.svalue,numerator,resultreg);
+         end;
+
       begin
        secondpass(left);
        secondpass(right);
+       { avoid warning }
+       divider:=NR_NO;
 
        { set result location }
        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
@@ -89,16 +138,32 @@ implementation
        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
        numerator:=left.location.register;
 
-       { load divider in a register }
-       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
-       divider:=right.location.register;
-
-       { start division }
-       if is_signed(left.resultdef) then
-         op:=A_SDIV
+       if (right.nodetype=ordconstn) and
+          ((tordconstnode(right).value=1) or
+           (tordconstnode(right).value=int64(-1)) or
+           (tordconstnode(right).value=0) or
+           ispowerof2(tordconstnode(right).value,power)) then
+         begin
+           genOrdConstNodeDiv;
+           if nodetype=modn then
+             begin
+               divider:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+               cg.a_load_const_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),int64(tordconstnode(right).value),divider);
+             end;
+         end
        else
-         op:=A_UDIV;
-       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         begin
+           { load divider in a register }
+           hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+           divider:=right.location.register;
+
+           { start division }
+           if is_signed(left.resultdef) then
+             op:=A_SDIV
+           else
+             op:=A_UDIV;
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         end;
 
        { no divide-by-zero detection available in hardware, emulate (if it's a
          constant, this will have been detected earlier already) }
@@ -187,6 +252,7 @@ implementation
         location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
         location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 begin

+ 1 - 1
compiler/aarch64/ncpumem.pas

@@ -113,7 +113,7 @@ implementation
           location.reference.offset:=0;
           base:=cg.getaddressregister(current_asmdata.CurrAsmList);
           cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,base);
-          reference_reset_base(location.reference,base,oldoffset,location.reference.alignment,location.reference.volatility);
+          reference_reset_base(location.reference,base,oldoffset,location.reference.temppos,location.reference.alignment,location.reference.volatility);
         end;
       shift:=BsfDWord(l);
       location.reference.index:=maybe_const_reg;

+ 121 - 6
compiler/aarch64/ncpuset.pas

@@ -31,9 +31,10 @@ interface
     type
        taarch64casenode = class(tcgcasenode)
          protected
-           procedure optimizevalues(var max_linear_list: aint; var max_dist: aword);override;
+           procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
            function  has_jumptable: boolean;override;
-           procedure genjumptable(hp: pcaselabel ;min_, max_: aint);override;
+           procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
+           procedure genlinearlist(hp: pcaselabel);override;
        end;
 
 
@@ -56,7 +57,7 @@ implementation
 *****************************************************************************}
 
 
-    procedure taarch64casenode.optimizevalues(var max_linear_list: aint; var max_dist: aword);
+    procedure taarch64casenode.optimizevalues(var max_linear_list: int64; var max_dist: qword);
       begin
         max_linear_list:=10;
       end;
@@ -68,7 +69,121 @@ implementation
       end;
 
 
-    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: aint);
+    procedure taarch64casenode.genlinearlist(hp : pcaselabel);
+      var
+        first : boolean;
+        lastrange : boolean;
+        last : TConstExprInt;
+        cond_lt,cond_le : tresflags;
+        opcgsize, unsigned_opcgsize: tcgsize;
+
+        procedure genitem(t : pcaselabel);
+          var
+           ovloc: tlocation;
+          begin
+            if assigned(t^.less) then
+              genitem(t^.less);
+            { need we to test the first value }
+            if first and (t^._low>get_min_value(left.resultdef)) then
+              begin
+                cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel);
+              end;
+            if t^._low=t^._high then
+              begin
+                 if t^._low-last=0 then
+                   cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid))
+                 else
+                   begin
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be used }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister,
+                       true,ovloc);
+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid));
+                   end;
+                 last:=t^._low;
+                 lastrange:=false;
+              end
+            else
+              begin
+                 { it begins with the smallest label, if the value }
+                 { is even smaller then jump immediately to the    }
+                 { ELSE-label                                }
+                 if first then
+                   begin
+                      { have we to ajust the first value ? }
+                      if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
+                        begin
+                          { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                            then genlinearlist wouldn't be use }
+                          cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister,
+                            true,ovloc);
+                        end;
+                   end
+                 else
+                   begin
+                     { if there is no unused label between the last and the }
+                     { present label then the lower limit can be checked    }
+                     { immediately. else check the range in between:       }
+
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be use }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister,
+                       true,ovloc);
+                     { no jump necessary here if the new range starts at }
+                     { at the value following the previous one           }
+                     if (aint(t^._low.svalue - last.svalue) <> 1) or
+                        (not lastrange) then
+                       cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel);
+                   end;
+                 { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                   then genlinearlist wouldn't be use }
+                 cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister,
+                   true,ovloc);
+                 cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid));
+
+                 last:=t^._high;
+                 lastrange:=true;
+              end;
+            first:=false;
+            if assigned(t^.greater) then
+              genitem(t^.greater);
+          end;
+
+        begin
+           opcgsize:=def_cgsize(opsize);
+           case opcgsize of
+             OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32:
+               unsigned_opcgsize:=OS_32;
+             OS_64,OS_S64:
+               unsigned_opcgsize:=OS_64;
+             else
+               Internalerror(2019090902);
+           end;
+           if with_sign then
+             begin
+                cond_lt:=F_LT;
+                cond_le:=F_LE;
+             end
+           else
+              begin
+                cond_lt:=F_CC;
+                cond_le:=F_LS;
+             end;
+           { do we need to generate cmps? }
+           if (with_sign and (min_label<0)) then
+             genlinearcmplist(hp)
+           else
+             begin
+                last:=0;
+                lastrange:=false;
+                first:=true;
+                genitem(hp);
+                cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel);
+             end;
+        end;
+
+
+    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
       var
         last: TConstExprInt;
         tablelabel: TAsmLabel;
@@ -80,7 +195,7 @@ implementation
 
       procedure genitem(list:TAsmList;t : pcaselabel);
         var
-          i : aint;
+          i : int64;
         begin
           if assigned(t^.less) then
             genitem(list,t^.less);
@@ -128,7 +243,7 @@ implementation
         basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
         cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
         { load table slot, 32-bit sign extended }
-        reference_reset_base(href,basereg,0,4,[]);
+        reference_reset_base(href,basereg,0,href.temppos,4,[]);
         href.index:=indexreg;
         href.shiftmode:=SM_LSL;
         href.shiftimm:=2;

+ 64 - 0
compiler/aarch64/ra64con.inc

@@ -71,161 +71,225 @@ NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
 NR_D0 = tregister($040a0000);
 NR_Q0 = tregister($04050000);
+NR_V08B = tregister($04170000);
+NR_V016B = tregister($04180000);
 NR_B1 = tregister($04010001);
 NR_H1 = tregister($04030001);
 NR_S1 = tregister($04090001);
 NR_D1 = tregister($040a0001);
 NR_Q1 = tregister($04050001);
+NR_V18B = tregister($04170001);
+NR_V116B = tregister($04180001);
 NR_B2 = tregister($04010002);
 NR_H2 = tregister($04030002);
 NR_S2 = tregister($04090002);
 NR_D2 = tregister($040a0002);
 NR_Q2 = tregister($04050002);
+NR_V28B = tregister($04170002);
+NR_V216B = tregister($04180002);
 NR_B3 = tregister($04010003);
 NR_H3 = tregister($04030003);
 NR_S3 = tregister($04090003);
 NR_D3 = tregister($040a0003);
 NR_Q3 = tregister($04050003);
+NR_V38B = tregister($04170003);
+NR_V316B = tregister($04180003);
 NR_B4 = tregister($04010004);
 NR_H4 = tregister($04030004);
 NR_S4 = tregister($04090004);
 NR_D4 = tregister($040a0004);
 NR_Q4 = tregister($04050004);
+NR_V48B = tregister($04170004);
+NR_V416B = tregister($04180004);
 NR_B5 = tregister($04010005);
 NR_H5 = tregister($04030005);
 NR_S5 = tregister($04090005);
 NR_D5 = tregister($040a0005);
 NR_Q5 = tregister($04050005);
+NR_V58B = tregister($04170005);
+NR_V516B = tregister($04180005);
 NR_B6 = tregister($04010006);
 NR_H6 = tregister($04030006);
 NR_S6 = tregister($04090006);
 NR_D6 = tregister($040a0006);
 NR_Q6 = tregister($04050006);
+NR_V68B = tregister($04170006);
+NR_V616B = tregister($04180006);
 NR_B7 = tregister($04010007);
 NR_H7 = tregister($04030007);
 NR_S7 = tregister($04090007);
 NR_D7 = tregister($040a0007);
 NR_Q7 = tregister($04050007);
+NR_V78B = tregister($04170007);
+NR_V716B = tregister($04180007);
 NR_B8 = tregister($04010008);
 NR_H8 = tregister($04030008);
 NR_S8 = tregister($04090008);
 NR_D8 = tregister($040a0008);
 NR_Q8 = tregister($04050008);
+NR_V88B = tregister($04170008);
+NR_V816B = tregister($04180008);
 NR_B9 = tregister($04010009);
 NR_H9 = tregister($04030009);
 NR_S9 = tregister($04090009);
 NR_D9 = tregister($040a0009);
 NR_Q9 = tregister($04050009);
+NR_V98B = tregister($04170009);
+NR_V916B = tregister($04180009);
 NR_B10 = tregister($0401000A);
 NR_H10 = tregister($0403000A);
 NR_S10 = tregister($0409000A);
 NR_D10 = tregister($040a000A);
 NR_Q10 = tregister($0405000A);
+NR_V108B = tregister($0417000A);
+NR_V1016B = tregister($0418000A);
 NR_B11 = tregister($0401000B);
 NR_H11 = tregister($0403000B);
 NR_S11 = tregister($0409000B);
 NR_D11 = tregister($040a000B);
 NR_Q11 = tregister($0405000B);
+NR_V118B = tregister($0417000B);
+NR_V1116B = tregister($0418000B);
 NR_B12 = tregister($0401000C);
 NR_H12 = tregister($0403000C);
 NR_S12 = tregister($0409000C);
 NR_D12 = tregister($040a000C);
 NR_Q12 = tregister($0405000C);
+NR_V128B = tregister($0417000C);
+NR_V1216B = tregister($0418000C);
 NR_B13 = tregister($0401000D);
 NR_H13 = tregister($0403000D);
 NR_S13 = tregister($0409000D);
 NR_D13 = tregister($040a000D);
 NR_Q13 = tregister($0405000D);
+NR_V138B = tregister($0417000D);
+NR_V1316B = tregister($0418000D);
 NR_B14 = tregister($0401000E);
 NR_H14 = tregister($0403000E);
 NR_S14 = tregister($0409000E);
 NR_D14 = tregister($040a000E);
 NR_Q14 = tregister($0405000E);
+NR_V148B = tregister($0417000E);
+NR_V1416B = tregister($0418000E);
 NR_B15 = tregister($0401000F);
 NR_H15 = tregister($0403000F);
 NR_S15 = tregister($0409000F);
 NR_D15 = tregister($040a000F);
 NR_Q15 = tregister($0405000F);
+NR_V158B = tregister($0417000F);
+NR_V1516B = tregister($0418000F);
 NR_B16 = tregister($04010010);
 NR_H16 = tregister($04030010);
 NR_S16 = tregister($04090010);
 NR_D16 = tregister($040a0010);
 NR_Q16 = tregister($04050010);
+NR_V168B = tregister($04170010);
+NR_V1616B = tregister($04180010);
 NR_B17 = tregister($04010011);
 NR_H17 = tregister($04030011);
 NR_S17 = tregister($04090011);
 NR_D17 = tregister($040a0011);
 NR_Q17 = tregister($04050011);
+NR_V178B = tregister($04170011);
+NR_V1716B = tregister($04180011);
 NR_B18 = tregister($04010012);
 NR_H18 = tregister($04030012);
 NR_S18 = tregister($04090012);
 NR_D18 = tregister($040a0012);
 NR_Q18 = tregister($04050012);
+NR_V188B = tregister($04170012);
+NR_V1816B = tregister($04180012);
 NR_B19 = tregister($04010013);
 NR_H19 = tregister($04030013);
 NR_S19 = tregister($04090013);
 NR_D19 = tregister($040a0013);
 NR_Q19 = tregister($04050013);
+NR_V198B = tregister($04170013);
+NR_V1916B = tregister($04180013);
 NR_B20 = tregister($04010014);
 NR_H20 = tregister($04030014);
 NR_S20 = tregister($04090014);
 NR_D20 = tregister($040a0014);
 NR_Q20 = tregister($04050014);
+NR_V208B = tregister($04170014);
+NR_V2016B = tregister($04180014);
 NR_B21 = tregister($04010015);
 NR_H21 = tregister($04030015);
 NR_S21 = tregister($04090015);
 NR_D21 = tregister($040a0015);
 NR_Q21 = tregister($04050015);
+NR_V218B = tregister($04170015);
+NR_V2116B = tregister($04180015);
 NR_B22 = tregister($04010016);
 NR_H22 = tregister($04030016);
 NR_S22 = tregister($04090016);
 NR_D22 = tregister($040a0016);
 NR_Q22 = tregister($04050016);
+NR_V228B = tregister($04170016);
+NR_V2216B = tregister($04180016);
 NR_B23 = tregister($04010017);
 NR_H23 = tregister($04030017);
 NR_S23 = tregister($04090017);
 NR_D23 = tregister($040a0017);
 NR_Q23 = tregister($04050017);
+NR_V238B = tregister($04170017);
+NR_V2316B = tregister($04180017);
 NR_B24 = tregister($04010018);
 NR_H24 = tregister($04030018);
 NR_S24 = tregister($04090018);
 NR_D24 = tregister($040a0018);
 NR_Q24 = tregister($04050018);
+NR_V248B = tregister($04170018);
+NR_V2416B = tregister($04180018);
 NR_B25 = tregister($04010019);
 NR_H25 = tregister($04030019);
 NR_S25 = tregister($04090019);
 NR_D25 = tregister($040a0019);
 NR_Q25 = tregister($04050019);
+NR_V258B = tregister($04170019);
+NR_V2516B = tregister($04180019);
 NR_B26 = tregister($0401001A);
 NR_H26 = tregister($0403001A);
 NR_S26 = tregister($0409001A);
 NR_D26 = tregister($040a001A);
 NR_Q26 = tregister($0405001A);
+NR_V268B = tregister($0417001A);
+NR_V2616B = tregister($0418001A);
 NR_B27 = tregister($0401001B);
 NR_H27 = tregister($0403001B);
 NR_S27 = tregister($0409001B);
 NR_D27 = tregister($040a001B);
 NR_Q27 = tregister($0405001B);
+NR_V278B = tregister($0417001B);
+NR_V2716B = tregister($0418001B);
 NR_B28 = tregister($0401001C);
 NR_H28 = tregister($0403001C);
 NR_S28 = tregister($0409001C);
 NR_D28 = tregister($040a001C);
 NR_Q28 = tregister($0405001C);
+NR_V288B = tregister($0417001C);
+NR_V2816B = tregister($0418001C);
 NR_B29 = tregister($0401001D);
 NR_H29 = tregister($0403001D);
 NR_S29 = tregister($0409001D);
 NR_D29 = tregister($040a001D);
 NR_Q29 = tregister($0405001D);
+NR_V298B = tregister($0417001D);
+NR_V2916B = tregister($0418001D);
 NR_B30 = tregister($0401001E);
 NR_H30 = tregister($0403001E);
 NR_S30 = tregister($0409001E);
 NR_D30 = tregister($040a001E);
 NR_Q30 = tregister($0405001E);
+NR_V308B = tregister($0417001E);
+NR_V3016B = tregister($0418001E);
 NR_B31 = tregister($0401001F);
 NR_H31 = tregister($0403001F);
 NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
 NR_Q31 = tregister($0405001F);
+NR_V318B = tregister($0417001F);
+NR_V3116B = tregister($0418001F);
 NR_NZCV = tregister($05000000);
 NR_FPCR = tregister($05000001);
 NR_FPSR = tregister($05000002);

+ 64 - 0
compiler/aarch64/ra64dwa.inc

@@ -71,6 +71,10 @@
 64,
 64,
 64,
+64,
+64,
+65,
+65,
 65,
 65,
 65,
@@ -81,6 +85,10 @@
 66,
 66,
 66,
+66,
+66,
+67,
+67,
 67,
 67,
 67,
@@ -91,6 +99,10 @@
 68,
 68,
 68,
+68,
+68,
+69,
+69,
 69,
 69,
 69,
@@ -98,129 +110,181 @@
 69,
 70,
 70,
+70                                                             ,
 70,
 70,
 70,
+70,
+71,
 71,
 71,
 71,
 71,
 71,
+71,
+72,
 72,
 72,
 72,
 72,
 72,
+72,
+73,
 73,
 73,
 73,
 73,
 73,
+73,
+74,
 74,
 74,
 74,
 74,
 74,
+74,
+75,
 75,
 75,
 75,
 75,
 75,
+75,
+76,
 76,
 76,
 76,
 76,
 76,
+76,
+77,
 77,
 77,
 77,
 77,
 77,
+77,
+78,
 78,
 78,
 78,
 78,
 78,
+78,
+79,
 79,
 79,
 79,
 79,
 79,
+79,
+80,
 80,
 80,
 80,
 80,
 80,
+80,
+81,
 81,
 81,
 81,
 81,
 81,
+81,
+82,
 82,
 82,
 82,
 82,
 82,
+82,
+83,
 83,
 83,
 83,
 83,
 83,
+83,
+84,
 84,
 84,
 84,
 84,
 84,
+84,
+85,
 85,
 85,
 85,
 85,
 85,
+85,
+86,
 86,
 86,
 86,
 86,
 86,
+86,
+87,
 87,
 87,
 87,
 87,
 87,
+87,
+88,
 88,
 88,
 88,
 88,
 88,
+88,
+89,
 89,
 89,
 89,
 89,
 89,
+89,
+90,
 90,
 90,
 90,
 90,
 90,
+90,
+91,
 91,
 91,
 91,
 91,
 91,
+91,
+92,
 92,
 92,
 92,
 92,
 92,
+92,
+93,
 93,
 93,
 93,
 93,
 93,
+93,
+94,
 94,
 94,
 94,
 94,
 94,
+94,
+95,
+95,
 95,
 95,
 95,

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
-231
+295

+ 64 - 0
compiler/aarch64/ra64num.inc

@@ -71,161 +71,225 @@ tregister($04030000),
 tregister($04090000),
 tregister($040a0000),
 tregister($04050000),
+tregister($04170000),
+tregister($04180000),
 tregister($04010001),
 tregister($04030001),
 tregister($04090001),
 tregister($040a0001),
 tregister($04050001),
+tregister($04170001),
+tregister($04180001),
 tregister($04010002),
 tregister($04030002),
 tregister($04090002),
 tregister($040a0002),
 tregister($04050002),
+tregister($04170002),
+tregister($04180002),
 tregister($04010003),
 tregister($04030003),
 tregister($04090003),
 tregister($040a0003),
 tregister($04050003),
+tregister($04170003),
+tregister($04180003),
 tregister($04010004),
 tregister($04030004),
 tregister($04090004),
 tregister($040a0004),
 tregister($04050004),
+tregister($04170004),
+tregister($04180004),
 tregister($04010005),
 tregister($04030005),
 tregister($04090005),
 tregister($040a0005),
 tregister($04050005),
+tregister($04170005),
+tregister($04180005),
 tregister($04010006),
 tregister($04030006),
 tregister($04090006),
 tregister($040a0006),
 tregister($04050006),
+tregister($04170006),
+tregister($04180006),
 tregister($04010007),
 tregister($04030007),
 tregister($04090007),
 tregister($040a0007),
 tregister($04050007),
+tregister($04170007),
+tregister($04180007),
 tregister($04010008),
 tregister($04030008),
 tregister($04090008),
 tregister($040a0008),
 tregister($04050008),
+tregister($04170008),
+tregister($04180008),
 tregister($04010009),
 tregister($04030009),
 tregister($04090009),
 tregister($040a0009),
 tregister($04050009),
+tregister($04170009),
+tregister($04180009),
 tregister($0401000A),
 tregister($0403000A),
 tregister($0409000A),
 tregister($040a000A),
 tregister($0405000A),
+tregister($0417000A),
+tregister($0418000A),
 tregister($0401000B),
 tregister($0403000B),
 tregister($0409000B),
 tregister($040a000B),
 tregister($0405000B),
+tregister($0417000B),
+tregister($0418000B),
 tregister($0401000C),
 tregister($0403000C),
 tregister($0409000C),
 tregister($040a000C),
 tregister($0405000C),
+tregister($0417000C),
+tregister($0418000C),
 tregister($0401000D),
 tregister($0403000D),
 tregister($0409000D),
 tregister($040a000D),
 tregister($0405000D),
+tregister($0417000D),
+tregister($0418000D),
 tregister($0401000E),
 tregister($0403000E),
 tregister($0409000E),
 tregister($040a000E),
 tregister($0405000E),
+tregister($0417000E),
+tregister($0418000E),
 tregister($0401000F),
 tregister($0403000F),
 tregister($0409000F),
 tregister($040a000F),
 tregister($0405000F),
+tregister($0417000F),
+tregister($0418000F),
 tregister($04010010),
 tregister($04030010),
 tregister($04090010),
 tregister($040a0010),
 tregister($04050010),
+tregister($04170010),
+tregister($04180010),
 tregister($04010011),
 tregister($04030011),
 tregister($04090011),
 tregister($040a0011),
 tregister($04050011),
+tregister($04170011),
+tregister($04180011),
 tregister($04010012),
 tregister($04030012),
 tregister($04090012),
 tregister($040a0012),
 tregister($04050012),
+tregister($04170012),
+tregister($04180012),
 tregister($04010013),
 tregister($04030013),
 tregister($04090013),
 tregister($040a0013),
 tregister($04050013),
+tregister($04170013),
+tregister($04180013),
 tregister($04010014),
 tregister($04030014),
 tregister($04090014),
 tregister($040a0014),
 tregister($04050014),
+tregister($04170014),
+tregister($04180014),
 tregister($04010015),
 tregister($04030015),
 tregister($04090015),
 tregister($040a0015),
 tregister($04050015),
+tregister($04170015),
+tregister($04180015),
 tregister($04010016),
 tregister($04030016),
 tregister($04090016),
 tregister($040a0016),
 tregister($04050016),
+tregister($04170016),
+tregister($04180016),
 tregister($04010017),
 tregister($04030017),
 tregister($04090017),
 tregister($040a0017),
 tregister($04050017),
+tregister($04170017),
+tregister($04180017),
 tregister($04010018),
 tregister($04030018),
 tregister($04090018),
 tregister($040a0018),
 tregister($04050018),
+tregister($04170018),
+tregister($04180018),
 tregister($04010019),
 tregister($04030019),
 tregister($04090019),
 tregister($040a0019),
 tregister($04050019),
+tregister($04170019),
+tregister($04180019),
 tregister($0401001A),
 tregister($0403001A),
 tregister($0409001A),
 tregister($040a001A),
 tregister($0405001A),
+tregister($0417001A),
+tregister($0418001A),
 tregister($0401001B),
 tregister($0403001B),
 tregister($0409001B),
 tregister($040a001B),
 tregister($0405001B),
+tregister($0417001B),
+tregister($0418001B),
 tregister($0401001C),
 tregister($0403001C),
 tregister($0409001C),
 tregister($040a001C),
 tregister($0405001C),
+tregister($0417001C),
+tregister($0418001C),
 tregister($0401001D),
 tregister($0403001D),
 tregister($0409001D),
 tregister($040a001D),
 tregister($0405001D),
+tregister($0417001D),
+tregister($0418001D),
 tregister($0401001E),
 tregister($0403001E),
 tregister($0409001E),
 tregister($040a001E),
 tregister($0405001E),
+tregister($0417001E),
+tregister($0418001E),
 tregister($0401001F),
 tregister($0403001F),
 tregister($0409001F),
 tregister($040a001F),
 tregister($0405001F),
+tregister($0417001F),
+tregister($0418001F),
 tregister($05000000),
 tregister($05000001),
 tregister($05000002),

+ 201 - 137
compiler/aarch64/ra64rni.inc

@@ -67,166 +67,230 @@
 64,
 66,
 67,
-72,
-77,
-82,
-87,
-92,
-97,
+74,
+81,
+88,
+95,
 102,
-107,
-112,
-117,
-122,
-127,
-132,
+109,
+116,
+123,
+130,
 137,
-142,
-147,
-152,
-157,
-162,
-167,
+144,
+151,
+158,
+165,
 172,
-177,
-182,
-187,
-192,
-197,
-202,
+179,
+186,
+193,
+200,
 207,
-212,
-217,
-222,
+214,
+221,
+228,
+235,
+242,
+249,
+256,
+263,
+270,
+277,
+284,
 68,
-73,
-78,
-83,
-88,
-93,
-98,
+75,
+82,
+89,
+96,
 103,
-108,
-113,
-118,
-123,
-128,
-133,
+110,
+117,
+124,
+131,
 138,
-143,
-148,
-153,
-158,
-163,
-168,
+145,
+152,
+159,
+166,
 173,
-178,
-183,
-188,
-193,
-198,
-203,
+180,
+187,
+194,
+201,
 208,
-213,
-218,
-223,
+215,
+222,
+229,
+236,
+243,
+250,
+257,
+264,
+271,
+278,
+285,
 71,
-76,
-81,
-86,
-91,
-96,
-101,
+78,
+85,
+92,
+99,
 106,
-111,
-116,
-121,
-126,
-131,
-136,
+113,
+120,
+127,
+134,
 141,
-146,
-151,
-156,
-161,
-166,
-171,
+148,
+155,
+162,
+169,
 176,
-181,
-186,
-191,
-196,
-201,
-206,
+183,
+190,
+197,
+204,
 211,
-216,
-221,
-226,
+218,
+225,
+232,
+239,
+246,
+253,
+260,
+267,
+274,
+281,
+288,
 69,
-74,
-79,
-84,
-89,
-94,
-99,
+76,
+83,
+90,
+97,
 104,
-109,
-114,
-119,
-124,
-129,
-134,
+111,
+118,
+125,
+132,
 139,
-144,
-149,
-154,
-159,
-164,
-169,
+146,
+153,
+160,
+167,
 174,
-179,
-184,
-189,
-194,
-199,
-204,
+181,
+188,
+195,
+202,
 209,
-214,
-219,
-224,
+216,
+223,
+230,
+237,
+244,
+251,
+258,
+265,
+272,
+279,
+286,
 70,
-75,
-80,
-85,
-90,
-95,
-100,
+77,
+84,
+91,
+98,
 105,
-110,
-115,
-120,
-125,
-130,
-135,
+112,
+119,
+126,
+133,
 140,
-145,
-150,
-155,
-160,
-165,
-170,
+147,
+154,
+161,
+168,
 175,
-180,
-185,
-190,
-195,
-200,
-205,
+182,
+189,
+196,
+203,
 210,
-215,
+217,
+224,
+231,
+238,
+245,
+252,
+259,
+266,
+273,
+280,
+287,
+72,
+79,
+86,
+93,
+100,
+107,
+114,
+121,
+128,
+135,
+142,
+149,
+156,
+163,
+170,
+177,
+184,
+191,
+198,
+205,
+212,
+219,
+226,
+233,
+240,
+247,
+254,
+261,
+268,
+275,
+282,
+289,
+73,
+80,
+87,
+94,
+101,
+108,
+115,
+122,
+129,
+136,
+143,
+150,
+157,
+164,
+171,
+178,
+185,
+192,
+199,
+206,
+213,
 220,
-225,
 227,
-228,
-229,
-230
+234,
+241,
+248,
+255,
+262,
+269,
+276,
+283,
+290,
+291,
+292,
+293,
+294

+ 200 - 136
compiler/aarch64/ra64sri.inc

@@ -1,170 +1,234 @@
 { don't edit, this file is generated from a64reg.dat }
 0,
 67,
-72,
-117,
-122,
-127,
-132,
+74,
 137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
+144,
+151,
+158,
+165,
 172,
-177,
-182,
-187,
-192,
-197,
-202,
+179,
+186,
+193,
+200,
+81,
 207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
+214,
+221,
+228,
+235,
+242,
+249,
+256,
+263,
+270,
+88,
+277,
+284,
+95,
 102,
-107,
-112,
-70,
-75,
-120,
-125,
+109,
+116,
+123,
 130,
-135,
+70,
+77,
 140,
-145,
-150,
-155,
-160,
-165,
-80,
-170,
+147,
+154,
+161,
+168,
 175,
-180,
-185,
-190,
-195,
-200,
-205,
+182,
+189,
+196,
+203,
+84,
 210,
-215,
-85,
-220,
-225,
-90,
-95,
-100,
+217,
+224,
+231,
+238,
+245,
+252,
+259,
+266,
+273,
+91,
+280,
+287,
+98,
 105,
-110,
-115,
-228,
-229,
-68,
-73,
-118,
-123,
-128,
+112,
+119,
+126,
 133,
+292,
+293,
+68,
+75,
 138,
-143,
-148,
-153,
-158,
-163,
-78,
-168,
+145,
+152,
+159,
+166,
 173,
-178,
-183,
-188,
-193,
-198,
-203,
+180,
+187,
+194,
+201,
+82,
 208,
-213,
-83,
-218,
-223,
-88,
-93,
-98,
+215,
+222,
+229,
+236,
+243,
+250,
+257,
+264,
+271,
+89,
+278,
+285,
+96,
 103,
-108,
-113,
-227,
-71,
-76,
-121,
-126,
+110,
+117,
+124,
 131,
-136,
+291,
+71,
+78,
 141,
-146,
-151,
-156,
-161,
-166,
-81,
-171,
+148,
+155,
+162,
+169,
 176,
-181,
-186,
-191,
-196,
-201,
-206,
+183,
+190,
+197,
+204,
+85,
 211,
-216,
-86,
-221,
-226,
-91,
-96,
-101,
+218,
+225,
+232,
+239,
+246,
+253,
+260,
+267,
+274,
+92,
+281,
+288,
+99,
 106,
-111,
-116,
-69,
-74,
-119,
-124,
-129,
+113,
+120,
+127,
 134,
+69,
+76,
 139,
-144,
+146,
+153,
+160,
+167,
+174,
+181,
+188,
+195,
+202,
+83,
+209,
+216,
+223,
+230,
+237,
+244,
+251,
+258,
+265,
+272,
+90,
+279,
+286,
+97,
+104,
+111,
+118,
+125,
+132,
+66,
+294,
+73,
+72,
+80,
+79,
+143,
+142,
+150,
 149,
-154,
-159,
+157,
+156,
 164,
-79,
-169,
-174,
-179,
+163,
+171,
+170,
+178,
+177,
+185,
 184,
-189,
-194,
+192,
+191,
 199,
-204,
-209,
-214,
-84,
+198,
+206,
+205,
+87,
+86,
+213,
+212,
+220,
 219,
-224,
-89,
+227,
+226,
+234,
+233,
+241,
+240,
+248,
+247,
+255,
+254,
+262,
+261,
+269,
+268,
+276,
+275,
 94,
-99,
-104,
-109,
+93,
+283,
+282,
+290,
+289,
+101,
+100,
+108,
+107,
+115,
 114,
-66,
-230,
+122,
+121,
+129,
+128,
+136,
+135,
 1,
 3,
 21,

+ 64 - 0
compiler/aarch64/ra64sta.inc

@@ -71,6 +71,10 @@
 64,
 64,
 64,
+64,
+64,
+65,
+65,
 65,
 65,
 65,
@@ -81,6 +85,10 @@
 66,
 66,
 66,
+66,
+66,
+67,
+67,
 67,
 67,
 67,
@@ -91,6 +99,10 @@
 68,
 68,
 68,
+68,
+68,
+69,
+69,
 69,
 69,
 69,
@@ -101,6 +113,10 @@
 70,
 70,
 70,
+70,
+70,
+71,
+71,
 71,
 71,
 71,
@@ -111,6 +127,10 @@
 72,
 72,
 72,
+72,
+72,
+73,
+73,
 73,
 73,
 73,
@@ -121,6 +141,10 @@
 74,
 74,
 74,
+74,
+74,
+75,
+75,
 75,
 75,
 75,
@@ -131,6 +155,10 @@
 76,
 76,
 76,
+76,
+76,
+77,
+77,
 77,
 77,
 77,
@@ -141,6 +169,10 @@
 78,
 78,
 78,
+78,
+78,
+79,
+79,
 79,
 79,
 79,
@@ -151,6 +183,10 @@
 80,
 80,
 80,
+80,
+80,
+81,
+81,
 81,
 81,
 81,
@@ -161,6 +197,10 @@
 82,
 82,
 82,
+82,
+82,
+83,
+83,
 83,
 83,
 83,
@@ -171,6 +211,10 @@
 84,
 84,
 84,
+84,
+84,
+85,
+85,
 85,
 85,
 85,
@@ -181,6 +225,10 @@
 86,
 86,
 86,
+86,
+86,
+87,
+87,
 87,
 87,
 87,
@@ -191,6 +239,10 @@
 88,
 88,
 88,
+88,
+88,
+89,
+89,
 89,
 89,
 89,
@@ -201,6 +253,10 @@
 90,
 90,
 90,
+90,
+90,
+91,
+91,
 91,
 91,
 91,
@@ -211,6 +267,10 @@
 92,
 92,
 92,
+92,
+92,
+93,
+93,
 93,
 93,
 93,
@@ -221,6 +281,10 @@
 94,
 94,
 94,
+94,
+94,
+95,
+95,
 95,
 95,
 95,

+ 64 - 0
compiler/aarch64/ra64std.inc

@@ -71,161 +71,225 @@
 's0',
 'd0',
 'q0',
+'v0.8b',
+'v0.16b',
 'b1',
 'h1',
 's1',
 'd1',
 'q1',
+'v1.8b',
+'v1.16b',
 'b2',
 'h2',
 's2',
 'd2',
 'q2',
+'v2.8b',
+'v2.16b',
 'b3',
 'h3',
 's3',
 'd3',
 'q3',
+'v3.8b',
+'v3.16b',
 'b4',
 'h4',
 's4',
 'd4',
 'q4',
+'v4.8b',
+'v4.16b',
 'b5',
 'h5',
 's5',
 'd5',
 'q5',
+'v5.8b',
+'v5.16b',
 'b6',
 'h6',
 's6',
 'd6',
 'q6',
+'v6.8b',
+'v6.16b',
 'b7',
 'h7',
 's7',
 'd7',
 'q7',
+'v7.8b',
+'v7.16b',
 'b8',
 'h8',
 's8',
 'd8',
 'q8',
+'v8.8b',
+'v8.16b',
 'b9',
 'h9',
 's9',
 'd9',
 'q9',
+'v9.8b',
+'v9.16b',
 'b10',
 'h10',
 's10',
 'd10',
 'q10',
+'v10.8b',
+'v10.16b',
 'b11',
 'h11',
 's11',
 'd11',
 'q11',
+'v11.8b',
+'v11.16b',
 'b12',
 'h12',
 's12',
 'd12',
 'q12',
+'v12.8b',
+'v12.16b',
 'b13',
 'h13',
 's13',
 'd13',
 'q13',
+'v13.8b',
+'v13.16b',
 'b14',
 'h14',
 's14',
 'd14',
 'q14',
+'v14.8b',
+'v14.16b',
 'b15',
 'h15',
 's15',
 'd15',
 'q15',
+'v15.8b',
+'v15.16b',
 'b16',
 'h16',
 's16',
 'd16',
 'q16',
+'v16.8b',
+'v16.16b',
 'b17',
 'h17',
 's17',
 'd17',
 'q17',
+'v17.8b',
+'v17.16b',
 'b18',
 'h18',
 's18',
 'd18',
 'q18',
+'v18.8b',
+'v18.16b',
 'b19',
 'h19',
 's19',
 'd19',
 'q19',
+'v19.8b',
+'v19.16b',
 'b20',
 'h20',
 's20',
 'd20',
 'q20',
+'v20.8b',
+'v20.16b',
 'b21',
 'h21',
 's21',
 'd21',
 'q21',
+'v21.8b',
+'v21.16b',
 'b22',
 'h22',
 's22',
 'd22',
 'q22',
+'v22.8b',
+'v22.16b',
 'b23',
 'h23',
 's23',
 'd23',
 'q23',
+'v23.8b',
+'v23.16b',
 'b24',
 'h24',
 's24',
 'd24',
 'q24',
+'v24.8b',
+'v24.16b',
 'b25',
 'h25',
 's25',
 'd25',
 'q25',
+'v25.8b',
+'v25.16b',
 'b26',
 'h26',
 's26',
 'd26',
 'q26',
+'v26.8b',
+'v26.16b',
 'b27',
 'h27',
 's27',
 'd27',
 'q27',
+'v27.8b',
+'v27.16b',
 'b28',
 'h28',
 's28',
 'd28',
 'q28',
+'v28.8b',
+'v28.16b',
 'b29',
 'h29',
 's29',
 'd29',
 'q29',
+'v29.8b',
+'v29.16b',
 'b30',
 'h30',
 's30',
 'd30',
 'q30',
+'v30.8b',
+'v30.16b',
 'b31',
 'h31',
 's31',
 'd31',
 'q31',
+'v31.8b',
+'v31.16b',
 'nzcv',
 'fpcr',
 'fpsr',

+ 64 - 0
compiler/aarch64/ra64sup.inc

@@ -71,161 +71,225 @@ RS_H0 = $00;
 RS_S0 = $00;
 RS_D0 = $00;
 RS_Q0 = $00;
+RS_V08B = $00;
+RS_V016B = $00;
 RS_B1 = $01;
 RS_H1 = $01;
 RS_S1 = $01;
 RS_D1 = $01;
 RS_Q1 = $01;
+RS_V18B = $01;
+RS_V116B = $01;
 RS_B2 = $02;
 RS_H2 = $02;
 RS_S2 = $02;
 RS_D2 = $02;
 RS_Q2 = $02;
+RS_V28B = $02;
+RS_V216B = $02;
 RS_B3 = $03;
 RS_H3 = $03;
 RS_S3 = $03;
 RS_D3 = $03;
 RS_Q3 = $03;
+RS_V38B = $03;
+RS_V316B = $03;
 RS_B4 = $04;
 RS_H4 = $04;
 RS_S4 = $04;
 RS_D4 = $04;
 RS_Q4 = $04;
+RS_V48B = $04;
+RS_V416B = $04;
 RS_B5 = $05;
 RS_H5 = $05;
 RS_S5 = $05;
 RS_D5 = $05;
 RS_Q5 = $05;
+RS_V58B = $05;
+RS_V516B = $05;
 RS_B6 = $06;
 RS_H6 = $06;
 RS_S6 = $06;
 RS_D6 = $06;
 RS_Q6 = $06;
+RS_V68B = $06;
+RS_V616B = $06;
 RS_B7 = $07;
 RS_H7 = $07;
 RS_S7 = $07;
 RS_D7 = $07;
 RS_Q7 = $07;
+RS_V78B = $07;
+RS_V716B = $07;
 RS_B8 = $08;
 RS_H8 = $08;
 RS_S8 = $08;
 RS_D8 = $08;
 RS_Q8 = $08;
+RS_V88B = $08;
+RS_V816B = $08;
 RS_B9 = $09;
 RS_H9 = $09;
 RS_S9 = $09;
 RS_D9 = $09;
 RS_Q9 = $09;
+RS_V98B = $09;
+RS_V916B = $09;
 RS_B10 = $0A;
 RS_H10 = $0A;
 RS_S10 = $0A;
 RS_D10 = $0A;
 RS_Q10 = $0A;
+RS_V108B = $0A;
+RS_V1016B = $0A;
 RS_B11 = $0B;
 RS_H11 = $0B;
 RS_S11 = $0B;
 RS_D11 = $0B;
 RS_Q11 = $0B;
+RS_V118B = $0B;
+RS_V1116B = $0B;
 RS_B12 = $0C;
 RS_H12 = $0C;
 RS_S12 = $0C;
 RS_D12 = $0C;
 RS_Q12 = $0C;
+RS_V128B = $0C;
+RS_V1216B = $0C;
 RS_B13 = $0D;
 RS_H13 = $0D;
 RS_S13 = $0D;
 RS_D13 = $0D;
 RS_Q13 = $0D;
+RS_V138B = $0D;
+RS_V1316B = $0D;
 RS_B14 = $0E;
 RS_H14 = $0E;
 RS_S14 = $0E;
 RS_D14 = $0E;
 RS_Q14 = $0E;
+RS_V148B = $0E;
+RS_V1416B = $0E;
 RS_B15 = $0F;
 RS_H15 = $0F;
 RS_S15 = $0F;
 RS_D15 = $0F;
 RS_Q15 = $0F;
+RS_V158B = $0F;
+RS_V1516B = $0F;
 RS_B16 = $10;
 RS_H16 = $10;
 RS_S16 = $10;
 RS_D16 = $10;
 RS_Q16 = $10;
+RS_V168B = $10;
+RS_V1616B = $10;
 RS_B17 = $11;
 RS_H17 = $11;
 RS_S17 = $11;
 RS_D17 = $11;
 RS_Q17 = $11;
+RS_V178B = $11;
+RS_V1716B = $11;
 RS_B18 = $12;
 RS_H18 = $12;
 RS_S18 = $12;
 RS_D18 = $12;
 RS_Q18 = $12;
+RS_V188B = $12;
+RS_V1816B = $12;
 RS_B19 = $13;
 RS_H19 = $13;
 RS_S19 = $13;
 RS_D19 = $13;
 RS_Q19 = $13;
+RS_V198B = $13;
+RS_V1916B = $13;
 RS_B20 = $14;
 RS_H20 = $14;
 RS_S20 = $14;
 RS_D20 = $14;
 RS_Q20 = $14;
+RS_V208B = $14;
+RS_V2016B = $14;
 RS_B21 = $15;
 RS_H21 = $15;
 RS_S21 = $15;
 RS_D21 = $15;
 RS_Q21 = $15;
+RS_V218B = $15;
+RS_V2116B = $15;
 RS_B22 = $16;
 RS_H22 = $16;
 RS_S22 = $16;
 RS_D22 = $16;
 RS_Q22 = $16;
+RS_V228B = $16;
+RS_V2216B = $16;
 RS_B23 = $17;
 RS_H23 = $17;
 RS_S23 = $17;
 RS_D23 = $17;
 RS_Q23 = $17;
+RS_V238B = $17;
+RS_V2316B = $17;
 RS_B24 = $18;
 RS_H24 = $18;
 RS_S24 = $18;
 RS_D24 = $18;
 RS_Q24 = $18;
+RS_V248B = $18;
+RS_V2416B = $18;
 RS_B25 = $19;
 RS_H25 = $19;
 RS_S25 = $19;
 RS_D25 = $19;
 RS_Q25 = $19;
+RS_V258B = $19;
+RS_V2516B = $19;
 RS_B26 = $1A;
 RS_H26 = $1A;
 RS_S26 = $1A;
 RS_D26 = $1A;
 RS_Q26 = $1A;
+RS_V268B = $1A;
+RS_V2616B = $1A;
 RS_B27 = $1B;
 RS_H27 = $1B;
 RS_S27 = $1B;
 RS_D27 = $1B;
 RS_Q27 = $1B;
+RS_V278B = $1B;
+RS_V2716B = $1B;
 RS_B28 = $1C;
 RS_H28 = $1C;
 RS_S28 = $1C;
 RS_D28 = $1C;
 RS_Q28 = $1C;
+RS_V288B = $1C;
+RS_V2816B = $1C;
 RS_B29 = $1D;
 RS_H29 = $1D;
 RS_S29 = $1D;
 RS_D29 = $1D;
 RS_Q29 = $1D;
+RS_V298B = $1D;
+RS_V2916B = $1D;
 RS_B30 = $1E;
 RS_H30 = $1E;
 RS_S30 = $1E;
 RS_D30 = $1E;
 RS_Q30 = $1E;
+RS_V308B = $1E;
+RS_V3016B = $1E;
 RS_B31 = $1F;
 RS_H31 = $1F;
 RS_S31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
+RS_V318B = $1F;
+RS_V3116B = $1F;
 RS_NZCV = $00;
 RS_FPCR = $01;
 RS_FPSR = $02;

+ 3 - 0
compiler/aarch64/racpu.pas

@@ -73,6 +73,7 @@ unit racpu;
         { a 32 bit integer register could actually be 16 or 8 bit }
         if result=OS_32 then
           case oppostfix of
+            PF_NONE: ;
             PF_B:
               result:=OS_8;
             PF_SB:
@@ -81,6 +82,8 @@ unit racpu;
               result:=OS_16;
             PF_SH:
               result:=OS_S16;
+            else
+              Message(asmr_e_invalid_opcode_and_operand)
           end;
       end;
 

+ 15 - 12
compiler/aarch64/racpugas.pas

@@ -55,10 +55,10 @@ Unit racpugas;
       globtype,verbose,
       systems,aasmbase,aasmtai,aasmdata,aasmcpu,
       { symtable }
-      symconst,symsym,
+      symconst,symsym,symdef,
       procinfo,
       rabase,rautils,
-      cgbase,cgutils;
+      cgbase,cgutils,paramgr;
 
 
     function taarch64attreader.is_register(const s:string):boolean;
@@ -461,7 +461,7 @@ Unit racpugas;
 
       const
         shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
-          ('LSL','LSR','ASR',
+          ('LSL','LSR','ASR','ROR',
            'UXTB','UXTH','UXTW','UXTX',
            'SXTB','SXTH','SXTW','SXTX');
       var
@@ -485,8 +485,8 @@ Unit racpugas;
                       useszr:=false;
                       for i:=low(instr.operands) to pred(opnr) do
                         begin
-                          if (instr.operands[1].opr.typ=OPR_REGISTER) then
-                            case getsupreg(instr.operands[1].opr.reg) of
+                          if (instr.operands[i].opr.typ=OPR_REGISTER) then
+                            case getsupreg(instr.operands[i].opr.reg) of
                               RS_XZR:
                                 useszr:=true;
                               RS_SP:
@@ -494,7 +494,10 @@ Unit racpugas;
                             end;
                         end;
                       result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
-                    end
+                      if result then
+                        instr.Ops:=opnr;
+                    end;
+                  break;
                 end;
           end;
       end;
@@ -520,6 +523,8 @@ Unit racpugas;
                     end;
                 end;
             end;
+          else
+            ;
         end;
         result:=C_None;;
       end;
@@ -560,7 +565,8 @@ Unit racpugas;
                oper.opr.symbol:=hl;
              end
             else if (actopcode=A_ADR) or
-               (actopcode=A_ADRP) then
+               (actopcode=A_ADRP) or
+               (actopcode=A_LDR) then
               begin
                 oper.InitRef;
                 MaybeAddGotAddrMode;
@@ -607,10 +613,8 @@ Unit racpugas;
                   { don't allow direct access to fields of parameters, because that
                     will generate buggy code. Allow it only for explicit typecasting }
                   if hasdot and
-                     (not oper.hastype) and
-                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
-                     (current_procinfo.procdef.proccalloption<>pocall_register) then
-                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                     (not oper.hastype) then
+                    checklocalsubscript(oper.opr.localsym);
                   inc(oper.opr.localsymofs,l)
                 end;
               OPR_CONSTANT :
@@ -935,7 +939,6 @@ Unit racpugas;
         j  : longint;
         hs : string;
         maxlen : longint;
-        icond : tasmcond;
       Begin
         { making s a value parameter would break other assembler readers }
         hs:=s;

+ 5 - 1
compiler/aarch64/rgcpu.pas

@@ -83,7 +83,7 @@ implementation
               hreg:=cg.getaddressregister(helplist);
 
             cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
-            reference_reset_base(tmpref,spilltemp.base,0,sizeof(pint),[]);
+            reference_reset_base(tmpref,spilltemp.base,0,spilltemp.temppos,sizeof(pint),[]);
             tmpref.index:=hreg;
             if isload then
               helpins:=spilling_create_load(tmpref,tempreg)
@@ -140,6 +140,8 @@ implementation
                { ok in immediate form }
                if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
                  exit;
+             else
+               ;
            end;
            { add interferences for other registers }
            for i:=0 to taicpu(p).ops-1 do
@@ -163,6 +165,8 @@ implementation
                              add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
                        end;
                    end;
+                 else
+                   ;
                end;
              end;
          end;

+ 16 - 113
compiler/aasmbase.pas

@@ -39,8 +39,10 @@ interface
     type
        TAsmsymbind=(
          AB_NONE,AB_EXTERNAL,AB_COMMON,AB_LOCAL,AB_GLOBAL,AB_WEAK_EXTERNAL,
-         { global in the current program/library, but not visible outside it }
-         AB_PRIVATE_EXTERN,AB_LAZY,AB_IMPORT,
+         { global in the current program/library, but not visible outside it
+           (= "hidden" in ELF) }
+         AB_PRIVATE_EXTERN,
+         AB_LAZY,AB_IMPORT,
          { a symbol that's internal to the compiler and used as a temp }
          AB_TEMP,
          { a global symbol that points to another global symbol and is only used
@@ -74,10 +76,10 @@ interface
        { is the label only there for getting an DataOffset (e.g. for i/o
          checks -> alt_addr) or is it a jump target (alt_jump), for debug
          info alt_dbgline and alt_dbgfile, etc. }
-       TAsmLabelType = (alt_jump,alt_addr,alt_data,alt_dbgline,alt_dbgfile,alt_dbgtype,alt_dbgframe);
+       TAsmLabelType = (alt_jump,alt_addr,alt_data,alt_dbgline,alt_dbgfile,alt_dbgtype,alt_dbgframe,alt_eh_begin,alt_eh_end);
 
     const
-       asmlabeltypeprefix : array[TAsmLabeltype] of char = ('j','a','d','l','f','t','c');
+       asmlabeltypeprefix : array[TAsmLabeltype] of string[2] = ('j','a','d','l','f','t','c','eb','ee');
        asmsymbindname : array[TAsmsymbind] of string[23] = ('none', 'external','common',
        'local','global','weak external','private external','lazy','import','internal temp',
        'indirect','external indirect');
@@ -166,13 +168,20 @@ interface
          { stack segment for 16-bit DOS }
          sec_stack,
          { initial heap segment for 16-bit DOS }
-         sec_heap
+         sec_heap,
+         { dwarf based/gcc style exception handling }
+         sec_gcc_except_table,
+         sec_arm_attribute
        );
 
        TObjCAsmSectionType = sec_objc_class..sec_objc_protolist;
 
        TAsmSectionOrder = (secorder_begin,secorder_default,secorder_end);
 
+       TSectionFlag = (SF_A,SF_W,SF_X);
+       TSectionFlags = set of TSectionFlag;
+       TSectionProgbits = (SPB_None,SPB_PROGBITS,SPB_NOBITS,SPB_NOTE,SPB_ARM_ATTRIBUTES);
+
        TAsmSymbol = class(TFPHashObject)
        private
          { this need to be incremented with every symbol loading into the
@@ -219,6 +228,8 @@ interface
          labelnr   : longint;
          labeltype : TAsmLabelType;
          is_set    : boolean;
+         is_public : boolean;
+         defined_in_asmstatement : boolean;
          constructor Createlocal(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createstatic(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createglobal(AList: TFPHashObjectList; const modulename: TSymStr; nr: longint; ltyp: TAsmLabelType);
@@ -229,11 +240,6 @@ interface
     function create_smartlink_library:boolean;inline;
     function create_smartlink:boolean;inline;
 
-    function LengthUleb128(a: qword) : byte;
-    function LengthSleb128(a: int64) : byte;
-    function EncodeUleb128(a: qword;out buf) : byte;
-    function EncodeSleb128(a: int64;out buf) : byte;
-
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
 
     { dummy default noop callback }
@@ -282,109 +288,6 @@ implementation
       end;
 
 
-    function LengthUleb128(a: qword) : byte;
-      begin
-        result:=0;
-        repeat
-          a := a shr 7;
-          inc(result);
-          if a=0 then
-            break;
-        until false;
-      end;
-
-
-    function LengthSleb128(a: int64) : byte;
-      var
-        b, size: byte;
-        asign : int64;
-        neg, more: boolean;
-      begin
-        more := true;
-        neg := a < 0;
-        size := sizeof(a)*8;
-        result:=0;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if neg then
-            begin
-              { Use a variable to be sure that the correct or mask is generated }
-              asign:=1;
-              asign:=asign shl (size - 7);
-              a := a or -asign;
-            end;
-          if (((a = 0) and
-               (b and $40 = 0)) or
-              ((a = -1) and
-               (b and $40 <> 0))) then
-            more := false;
-          inc(result);
-          if not(more) then
-            break;
-        until false;
-      end;
-
-
-    function EncodeUleb128(a: qword;out buf) : byte;
-      var
-        b: byte;
-        pbuf : pbyte;
-      begin
-        result:=0;
-        pbuf:=@buf;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if a<>0 then
-            b := b or $80;
-          pbuf^:=b;
-          inc(pbuf);
-          inc(result);
-          if a=0 then
-            break;
-        until false;
-      end;
-
-
-    function EncodeSleb128(a: int64;out buf) : byte;
-      var
-        b, size: byte;
-        asign : int64;
-        neg, more: boolean;
-        pbuf : pbyte;
-      begin
-        more := true;
-        neg := a < 0;
-        size := sizeof(a)*8;
-        result:=0;
-        pbuf:=@buf;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if neg then
-            begin
-              { Use a variable to be sure that the correct or mask is generated }
-              asign:=1;
-              asign:=asign shl (size - 7);
-              a := a or -asign;
-            end;
-          if (((a = 0) and
-               (b and $40 = 0)) or
-              ((a = -1) and
-               (b and $40 <> 0))) then
-            more := false
-          else
-            b := b or $80;
-          pbuf^:=b;
-          inc(pbuf);
-          inc(result);
-          if not(more) then
-            break;
-        until false;
-      end;
-
-
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
       var
         i : longint;

+ 198 - 0
compiler/aasmcfi.pas

@@ -0,0 +1,198 @@
+{
+    Copyright (c) 2019 by Jonas Maebe, member of the
+    Free Pascal Compiler development team
+
+    Dwarf Call Frame Information directives
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit aasmcfi;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      globtype,
+      cgbase,
+      aasmtai;
+
+    type
+      tcfikind =
+        (cfi_startproc,
+         cfi_endproc,
+         cfi_personality,
+         cfi_personality_id,
+         cfi_fde_data,
+         cfi_lsda_encoding,
+         cfi_inline_lsda,
+         cfi_def_cfa,
+         cfi_def_cfa_register,
+         cfi_def_cfa_offset,
+         cfi_adjust_cfa_offset,
+         cfi_offset,
+         cfi_val_offset,
+         cfi_rel_offset,
+         cfi_register,
+         cfi_restore,
+         cfi_undefined,
+         cfi_same_value,
+         cfi_remember_state,
+         cfi_restore_state,
+         cfi_return_column,
+         cfi_signal_frame,
+         cfi_window_save,
+         cfi_escape,
+         cfi_val_encoded_addr
+        );
+
+{$push}
+{$j-}
+      const
+        cfi2str: array[tcfikind] of string[length('.cfi_adjust_cfa_offset')] =
+          ('.cfi_startproc',
+           '.cfi_endproc',
+           '.cfi_personality',
+           '.cfi_personality_id',
+           '.cfi_fde_data',
+           '.cfi_lsda_encoding',
+           '.cfi_inline_lsda',
+           '.cfi_def_cfa',
+           '.cfi_def_cfa_register',
+           '.cfi_def_cfa_offset',
+           '.cfi_adjust_cfa_offset',
+           '.cfi_offset',
+           '.cfi_val_offset',
+           '.cfi_rel_offset',
+           '.cfi_register',
+           '.cfi_restore',
+           '.cfi_undefined',
+           '.cfi_same_value',
+           '.cfi_remember_state',
+           '.cfi_restore_state',
+           '.cfi_return_column',
+           '.cfi_signal_frame',
+           '.cfi_window_save',
+           '.cfi_escape',
+           '.cfi_val_encoded_addr'
+          );
+{$pop}
+
+    type
+      tai_cfi_base = class abstract(tai)
+        cfityp: tcfikind;
+        constructor create(ctyp: tcfikind);
+      end;
+
+      tai_cfi_op_none = class(tai_cfi_base)
+      end;
+
+      tai_cfi_op_val = class(tai_cfi_base)
+        val1: aint;
+        constructor create(ctyp: tcfikind; const a: aint);
+      end;
+
+      tai_cfi_op_string = class(tai_cfi_base)
+        s1: TSymStr;
+        constructor create(ctyp: tcfikind; const str1: TSymStr);
+      end;
+
+      tai_cfi_op_val_string = class(tai_cfi_op_val)
+        s: TSymStr;
+        constructor create(ctyp: tcfikind; const a: aint; const str: TSymStr);
+      end;
+
+      tai_cfi_op_string_string = class(tai_cfi_op_string)
+        s2: TSymStr;
+        constructor create(ctyp: tcfikind; const str1, str2: TSymStr);
+      end;
+
+      tai_cfi_op_reg = class(tai_cfi_base)
+        reg1: tregister;
+        constructor create(ctyp: tcfikind; r: tregister);
+      end;
+
+      tai_cfi_op_reg_val = class(tai_cfi_op_reg)
+        val: aint;
+        constructor create(ctyp: tcfikind; r: tregister; a: aint);
+      end;
+
+      tai_cfi_op_reg_reg = class(tai_cfi_op_reg)
+        reg2: tregister;
+        constructor create(ctyp: tcfikind; r1, r2: tregister);
+      end;
+
+
+  implementation
+
+    constructor tai_cfi_base.create(ctyp: tcfikind);
+      begin
+        typ:=ait_cfi;
+        cfityp:=ctyp;
+      end;
+
+
+    constructor tai_cfi_op_val.create(ctyp: tcfikind; const a: aint);
+      begin
+        inherited create(ctyp);
+        val1:=a;
+      end;
+
+
+    constructor tai_cfi_op_string.create(ctyp: tcfikind; const str1: TSymStr);
+      begin
+        inherited create(ctyp);
+        s1:=str1;
+      end;
+
+
+    constructor tai_cfi_op_val_string.create(ctyp: tcfikind; const a: aint; const str: TSymStr);
+      begin
+        inherited create(ctyp,a);
+        s:=str;
+      end;
+
+
+    constructor tai_cfi_op_string_string.create(ctyp: tcfikind; const str1, str2: TSymStr);
+      begin
+        inherited create(ctyp,str1);
+        s2:=str2;
+      end;
+
+
+    constructor tai_cfi_op_reg.create(ctyp: tcfikind; r: tregister);
+      begin
+        inherited create(ctyp);
+        reg1:=r;
+      end;
+
+
+    constructor tai_cfi_op_reg_val.create(ctyp: tcfikind; r: tregister; a: aint);
+      begin
+        inherited create(ctyp,r);
+        val:=a;
+      end;
+
+
+    constructor tai_cfi_op_reg_reg.create(ctyp: tcfikind; r1, r2: tregister);
+      begin
+        inherited create(ctyp,r1);
+        reg2:=r2;
+      end;
+
+end.
+

+ 199 - 40
compiler/aasmcnst.pas

@@ -52,12 +52,13 @@ type
 
    { a simple data element; the value is stored as a tai }
    tai_simpletypedconst = class(tai_abstracttypedconst)
-   private
+    private
      procedure setval(AValue: tai);
     protected
      fval: tai;
     public
-     constructor create(_adetyp: ttypedconstkind; _def: tdef; _val: tai);
+     constructor create(_def: tdef; _val: tai);
+     destructor destroy; override;
      property val: tai read fval write setval;
    end;
 
@@ -69,7 +70,7 @@ type
      { iterator to walk over all individual items in the aggregate }
      tadeenumerator = class(tobject)
       private
-       fvalues: tfplist;
+       fvalues: tfpobjectlist;
        fvaluespos: longint;
        function getcurrent: tai_abstracttypedconst;
       public
@@ -80,7 +81,7 @@ type
      end;
 
     protected
-     fvalues: tfplist;
+     fvalues: tfpobjectlist;
      fisstring: boolean;
 
      { converts the existing data to a single tai_string }
@@ -89,10 +90,10 @@ type
     public
      constructor create(_adetyp: ttypedconstkind; _fdef: tdef);
      function getenumerator: tadeenumerator;
-     procedure addvalue(val: tai_abstracttypedconst);
+     procedure addvalue(val: tai_abstracttypedconst); virtual;
      function valuecount: longint;
      procedure insertvaluebeforepos(val: tai_abstracttypedconst; pos: longint);
-     function replacevalueatpos(val: tai_abstracttypedconst; pos: longint): tai_abstracttypedconst;
+     procedure replacevalueatpos(val: tai_abstracttypedconst; pos: longint);
      { change the type to a record, regardless of how the aggregate was created;
        the size of the original type and the record must match }
      procedure changetorecord(_def: trecorddef);
@@ -268,6 +269,10 @@ type
      function aggregate_kind(def: tdef): ttypedconstkind; virtual;
      { finalize the asmlist: add the necessary symbols etc }
      procedure finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions); virtual;
+     procedure finalize_asmlist_add_indirect_sym(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions); virtual;
+     { prepare finalization (common for the default and overridden versions }
+     procedure finalize_asmlist_prepare(const options: ttcasmlistoptions; var alignment: shortint);
+
      { functionality of the above for vectorized dead strippable sections }
      procedure finalize_vectorized_dead_strip_asmlist(def: tdef; const basename, itemname: TSymStr; st: tsymtable; alignment: shortint; options: ttcasmlistoptions); virtual;
 
@@ -346,6 +351,12 @@ type
      { emits a tasmlabofs as returned by emit_*string_const }
      procedure emit_string_offset(const ll: tasmlabofs; const strlength: longint; const st: tstringtype; const winlikewidestring: boolean; const charptrdef: tdef);virtual;
 
+     { emits a tasmlabofs as returned by begin_dynarray_const }
+     procedure emit_dynarray_offset(const ll:tasmlabofs;const arrlength:asizeint;const arrdef:tarraydef; const arrconstdatadef: trecorddef);virtual;
+     { starts a dynamic array constant so that its data can be emitted directly afterwards }
+     function begin_dynarray_const(arrdef:tdef;var startlab:tasmlabel;out arrlengthloc:ttypedconstplaceholder):tasmlabofs;virtual;
+     function end_dynarray_const(arrdef:tdef;arrlength:asizeint;arrlengthloc:ttypedconstplaceholder):tdef;virtual;
+
      { emit a shortstring constant, and return its def }
      function emit_shortstring_const(const str: shortstring): tdef;
      { emit a pchar string constant (the characters, not a pointer to them), and return its def }
@@ -357,6 +368,9 @@ type
      { emit an ordinal constant }
      procedure emit_ord_const(value: int64; def: tdef);
 
+     { emit a reference to a pooled shortstring constant }
+     procedure emit_pooled_shortstring_const_ref(const str:shortstring);
+
      { begin a potential aggregate type. Must be called for any type
        that consists of multiple tai constant data entries, or that
        represents an aggregate at the Pascal level (a record, a non-dynamic
@@ -379,7 +393,7 @@ type
         maxcrecordalign: specify maximum C record alignment (no equivalent in
           source code)
      }
-     function begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin, maxcrecordalign: shortint): trecorddef; virtual;
+     function begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin: shortint): trecorddef; virtual;
      function end_anonymous_record: trecorddef; virtual;
 
      { add a placeholder element at the current position that later can be
@@ -442,6 +456,12 @@ type
        supported this is equal to the header size }
      class function get_string_symofs(typ: tstringtype; winlikewidestring: boolean): pint; virtual;
 
+     { returns the offset of the array data relatve to dynamic array constant
+       labels. On most platforms, this is 0 (with the header at a negative
+       offset), but on some platforms such negative offsets are not supported
+       and thus this is equal to the header size }
+     class function get_dynarray_symofs:pint;virtual;
+
      { set the fieldvarsym whose data we will emit next; needed
        in case of variant records, so we know which part of the variant gets
        initialised. Also in case of objects, because the fieldvarsyms are spread
@@ -451,9 +471,10 @@ type
        record (also if that field is a nested anonymous record) }
      property next_field_name: TIDString write set_next_field_name;
     protected
-     { this one always return the actual offset, called by the above (and
+     { these ones always return the actual offset, called by the above (and
        overridden versions) }
      class function get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
+     class function get_dynarray_header_size:pint;
    end;
    ttai_typedconstbuilderclass = class of ttai_typedconstbuilder;
 
@@ -491,7 +512,7 @@ implementation
      cutils,
      verbose,globals,systems,widestr,
      fmodule,
-     symtable,defutil;
+     symtable,symutil,defutil;
 
 {****************************************************************************
                        taggregateinformation
@@ -568,8 +589,7 @@ implementation
             repeat
               inc(i);
               sym:=tsym(tabstractrecorddef(def).symtable.symlist[i]);
-            until (sym.typ=fieldvarsym) and
-              not(sp_static in sym.symoptions);
+            until is_normal_fieldvarsym(sym);
             curfield:=tfieldvarsym(sym);
             nextoffset:=curfield.fieldoffset;
             curindex:=i;
@@ -619,13 +639,20 @@ implementation
       end;
 
 
-   constructor tai_simpletypedconst.create(_adetyp: ttypedconstkind; _def: tdef; _val: tai);
+   constructor tai_simpletypedconst.create(_def: tdef; _val: tai);
      begin
-       inherited create(_adetyp,_def);
+       inherited create(tck_simple,_def);
        fval:=_val;
      end;
 
 
+   destructor tai_simpletypedconst.destroy;
+     begin
+       fval.free;
+       inherited destroy;
+     end;
+
+
 {****************************************************************************
               tai_aggregatetypedconst.tadeenumerator
  ****************************************************************************}
@@ -682,7 +709,7 @@ implementation
        { the "nil" def will be replaced with an array def of the appropriate
          size once we're finished adding data, so we don't create intermediate
          arraydefs all the time }
-       fvalues.add(tai_simpletypedconst.create(tck_simple,nil,newstr));
+       fvalues.add(tai_simpletypedconst.create(nil,newstr));
      end;
 
    procedure tai_aggregatetypedconst.add_to_string(strtai: tai_string; othertai: tai);
@@ -716,7 +743,7 @@ implementation
      begin
        inherited;
        fisstring:=false;
-       fvalues:=tfplist.create;
+       fvalues:=tfpobjectlist.create(true);
      end;
 
 
@@ -766,9 +793,9 @@ implementation
      end;
 
 
-   function tai_aggregatetypedconst.replacevalueatpos(val: tai_abstracttypedconst; pos: longint): tai_abstracttypedconst;
+   procedure tai_aggregatetypedconst.replacevalueatpos(val: tai_abstracttypedconst; pos: longint);
      begin
-       result:=tai_abstracttypedconst(fvalues[pos]);
+       { since fvalues owns its elements, it will automatically free the old value }
        fvalues[pos]:=val;
      end;
 
@@ -910,13 +937,7 @@ implementation
      end;
 
 
-   procedure ttai_typedconstbuilder.finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
-     var
-       prelist: tasmlist;
-       ptrdef : tdef;
-       symind : tasmsymbol;
-       indtcb : ttai_typedconstbuilder;
-       indsecname : tsymstr;
+   procedure ttai_typedconstbuilder.finalize_asmlist_prepare(const options: ttcasmlistoptions; var alignment: shortint);
      begin
        if tcalo_apply_constalign in options then
          alignment:=const_align(alignment);
@@ -932,7 +953,14 @@ implementation
              tcalo_vectorized_dead_strip_end]*options)<>[]) and
           not fvectorized_finalize_called then
          internalerror(2015110602);
+     end;
+
 
+   procedure ttai_typedconstbuilder.finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
+     var
+       prelist: tasmlist;
+     begin
+       finalize_asmlist_prepare(options, alignment);
        prelist:=tasmlist.create;
        { only now add items based on the symbolname, because it may be
          modified by the "section" specifier in case of a typed constant }
@@ -949,7 +977,14 @@ implementation
            new_section(prelist,section,secname,alignment);
          end
        else if tcalo_new_section in options then
-         new_section(prelist,section,secname,alignment)
+         begin
+           { insert ait_cutobject for smart-linking on targets
+             that do not support smarlinking based on sections,
+             like msdos }
+           if not (tf_smartlink_sections in target_info.flags) then
+             maybe_new_object_file(prelist);
+           new_section(prelist,section,secname,alignment);
+         end
        else
          prelist.concat(cai_align.Create(alignment));
 
@@ -987,11 +1022,19 @@ implementation
        fasmlist.concat(tai_symbol_end.Createname(sym.name));
        { free the temporary list }
        prelist.free;
+     end;
+
 
+   procedure ttai_typedconstbuilder.finalize_asmlist_add_indirect_sym(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
+     var
+       ptrdef : tdef;
+       symind : tasmsymbol;
+       indtcb : ttai_typedconstbuilder;
+       indsecname : tsymstr;
+     begin
        if (tcalo_data_force_indirect in options) and
-           not fvectorized_finalize_called and
-           (sym.bind in [AB_GLOBAL,AB_COMMON]) and
-           (sym.typ=AT_DATA) then
+          (sym.bind in [AB_GLOBAL,AB_COMMON]) and
+          (sym.typ=AT_DATA) then
          begin
            ptrdef:=cpointerdef.getreusable(def);
            symind:=current_asmdata.DefineAsmSymbol(sym.name,AB_INDIRECT,AT_DATA,ptrdef);
@@ -1067,6 +1110,7 @@ implementation
              secname:=make_mangledname(basename,st,'2_'+itemname);
            exclude(options,tcalo_vectorized_dead_strip_item);
          end;
+       current_module.linkorderedsymbols.concat(sym.Name);
        finalize_asmlist(sym,def,sectype,secname,alignment,options);
      end;
 
@@ -1083,6 +1127,7 @@ implementation
        if not fasmlist_finalized then
          begin
            finalize_asmlist(sym,def,section,secname,alignment,foptions);
+           finalize_asmlist_add_indirect_sym(sym,def,section,secname,alignment,foptions);
            fasmlist_finalized:=true;
          end;
        result:=fasmlist;
@@ -1110,6 +1155,16 @@ implementation
      end;
 
 
+   class function ttai_typedconstbuilder.get_dynarray_symofs:pint;
+     begin
+       { darwin's linker does not support negative offsets }
+       if not (target_info.system in systems_darwin) then
+         result:=0
+       else
+         result:=get_dynarray_header_size;
+     end;
+
+
    class function ttai_typedconstbuilder.get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
      var
        ansistring_header_size: pint;
@@ -1145,6 +1200,16 @@ implementation
      end;
 
 
+   class function ttai_typedconstbuilder.get_dynarray_header_size:pint;
+     begin
+       result:=
+         { reference count }
+         ptrsinttype.size +
+         { high value }
+         sizesinttype.size;
+     end;
+
+
    constructor ttai_typedconstbuilder.create(const options: ttcasmlistoptions);
      begin
        inherited create;
@@ -1331,7 +1396,7 @@ implementation
        result.ofs:=0;
        { pack the data, so that we don't add unnecessary null bytes after the
          constant string }
-       begin_anonymous_record('$'+get_dynstring_rec_name(stringtype,false,len),1,sizeof(TConstPtrUInt),1,1);
+       begin_anonymous_record('$'+get_dynstring_rec_name(stringtype,false,len),1,sizeof(TConstPtrUInt),1);
        string_symofs:=get_string_symofs(stringtype,false);
        { encoding }
        emit_tai(tai_const.create_16bit(encoding),u16inttype);
@@ -1543,7 +1608,7 @@ implementation
        if (typ<>st_widestring) or
           not winlike then
          begin
-           result:=crecorddef.create_global_internal('$'+name,1,1,1);
+           result:=crecorddef.create_global_internal('$'+name,1,1);
            { encoding }
            result.add_field_by_def('',u16inttype);
            { element size }
@@ -1569,8 +1634,7 @@ implementation
        else
          begin
            result:=crecorddef.create_global_internal('$'+name,4,
-             targetinfos[target_info.system]^.alignment.recordalignmin,
-             targetinfos[target_info.system]^.alignment.maxCrecordalign);
+             targetinfos[target_info.system]^.alignment.recordalignmin);
            { length in bytes }
            result.add_field_by_def('',s32inttype);
            streledef:=cwidechartype;
@@ -1601,7 +1665,7 @@ implementation
        datatcb.emit_tai(tai_string.create_pchar(s,len+1),datadef);
        datatcb.maybe_end_aggregate(datadef);
        ansistrrecdef:=datatcb.end_anonymous_record;
-       finish_internal_data_builder(datatcb,startlab,ansistrrecdef,const_align(sizeof(pointer)));
+       finish_internal_data_builder(datatcb,startlab,ansistrrecdef,const_align(voidpointertype.alignment));
      end;
 
 
@@ -1621,8 +1685,7 @@ implementation
            result.lab:=startlab;
            datatcb.begin_anonymous_record('$'+get_dynstring_rec_name(st_widestring,true,strlength),
              4,4,
-             targetinfos[target_info.system]^.alignment.recordalignmin,
-             targetinfos[target_info.system]^.alignment.maxCrecordalign);
+             targetinfos[target_info.system]^.alignment.recordalignmin);
            datatcb.emit_tai(Tai_const.Create_32bit(strlength*cwidechartype.size),s32inttype);
            { can we optimise by placing the string constant label at the
              required offset? }
@@ -1657,7 +1720,7 @@ implementation
        else
          { code generation for other sizes must be written }
          internalerror(200904271);
-       finish_internal_data_builder(datatcb,startlab,unicodestrrecdef,const_align(sizeof(pint)));
+       finish_internal_data_builder(datatcb,startlab,unicodestrrecdef,const_align(voidpointertype.alignment));
      end;
 
 
@@ -1667,6 +1730,52 @@ implementation
      end;
 
 
+   procedure ttai_typedconstbuilder.emit_dynarray_offset(const ll:tasmlabofs;const arrlength:asizeint;const arrdef:tarraydef; const arrconstdatadef: trecorddef);
+     begin
+       emit_tai(tai_const.create_sym_offset(ll.lab,ll.ofs),arrdef);
+     end;
+
+
+   function ttai_typedconstbuilder.begin_dynarray_const(arrdef:tdef;var startlab:tasmlabel;out arrlengthloc:ttypedconstplaceholder):tasmlabofs;
+     var
+       dynarray_symofs: asizeint;
+     begin
+       result.lab:=startlab;
+       result.ofs:=0;
+       { pack the data, so that we don't add unnecessary null bytes after the
+         constant string }
+       begin_anonymous_record('',1,sizeof(TConstPtrUInt),1);
+       dynarray_symofs:=get_dynarray_symofs;
+       { what to do if ptrsinttype <> sizesinttype??? }
+       emit_tai(tai_const.create_sizeint(-1),ptrsinttype);
+       inc(result.ofs,ptrsinttype.size);
+       arrlengthloc:=emit_placeholder(sizesinttype);
+       inc(result.ofs,sizesinttype.size);
+       if dynarray_symofs=0 then
+         begin
+           { results in slightly more efficient code }
+           emit_tai(tai_label.create(result.lab),arrdef);
+           result.ofs:=0;
+           { create new label of the same kind (including whether or not the
+             name starts with target_asm.labelprefix in case it's AB_LOCAL,
+             so we keep the difference depending on whether the original was
+             allocated via getstatic/getlocal/getglobal datalabel) }
+           startlab:=tasmlabel.create(current_asmdata.AsmSymbolDict,startlab.name+'$dynarrlab',startlab.bind,startlab.typ);
+         end;
+       { sanity check }
+       if result.ofs<>dynarray_symofs then
+         internalerror(2018020601);
+     end;
+
+
+   function ttai_typedconstbuilder.end_dynarray_const(arrdef:tdef;arrlength:asizeint;arrlengthloc:ttypedconstplaceholder):tdef;
+     begin
+       { we emit the high value, not the count }
+       arrlengthloc.replace(tai_const.Create_sizeint(arrlength-1),sizesinttype);
+       result:=end_anonymous_record;
+     end;
+
+
    function ttai_typedconstbuilder.emit_shortstring_const(const str: shortstring): tdef;
      begin
        { we use an arraydef instead of a shortstringdef, because we don't have
@@ -1752,6 +1861,56 @@ implementation
      end;
 
 
+   procedure ttai_typedconstbuilder.emit_pooled_shortstring_const_ref(const str:shortstring);
+     var
+       pool : thashset;
+       entry : phashsetitem;
+       strlab : tasmlabel;
+       l : longint;
+       pc : pansichar;
+       datadef : tdef;
+       strtcb : ttai_typedconstbuilder;
+     begin
+       pool:=current_asmdata.ConstPools[sp_shortstr];
+
+       entry:=pool.FindOrAdd(@str[1],length(str));
+
+       { :-(, we must generate a new entry }
+       if not assigned(entry^.Data) then
+         begin
+           current_asmdata.getglobaldatalabel(strlab);
+
+           { include length and terminating zero for quick conversion to pchar }
+           l:=length(str);
+           getmem(pc,l+2);
+           move(str[1],pc[1],l);
+           pc[0]:=chr(l);
+           pc[l+1]:=#0;
+
+           datadef:=carraydef.getreusable(cansichartype,l+2);
+
+           { we start a new constbuilder as we don't know whether we're called
+             from inside an internal constbuilder }
+           strtcb:=ctai_typedconstbuilder.create([tcalo_is_lab,tcalo_make_dead_strippable,tcalo_apply_constalign]);
+
+           strtcb.maybe_begin_aggregate(datadef);
+           strtcb.emit_tai(Tai_string.Create_pchar(pc,l+2),datadef);
+           strtcb.maybe_end_aggregate(datadef);
+
+           current_asmdata.asmlists[al_typedconsts].concatList(
+             strtcb.get_final_asmlist(strlab,datadef,sec_rodata_norel,strlab.name,const_align(sizeof(pint)))
+           );
+           strtcb.free;
+
+           entry^.Data:=strlab;
+         end
+       else
+         strlab:=tasmlabel(entry^.Data);
+
+       emit_tai(tai_const.Create_sym(strlab),charpointertype);
+     end;
+
+
    procedure ttai_typedconstbuilder.maybe_begin_aggregate(def: tdef);
      begin
        begin_aggregate_internal(def,false);
@@ -1764,7 +1923,7 @@ implementation
      end;
 
 
-   function ttai_typedconstbuilder.begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin, maxcrecordalign: shortint): trecorddef;
+   function ttai_typedconstbuilder.begin_anonymous_record(const optionalname: string; packrecords, recordalign, recordalignmin: shortint): trecorddef;
      var
        anonrecorddef: trecorddef;
        typesym: ttypesym;
@@ -1785,7 +1944,7 @@ implementation
              end;
          end;
        { create skeleton def }
-       anonrecorddef:=crecorddef.create_global_internal(optionalname,packrecords,recordalignmin,maxcrecordalign);
+       anonrecorddef:=crecorddef.create_global_internal(optionalname,packrecords,recordalignmin);
        trecordsymtable(anonrecorddef.symtable).recordalignment:=recordalign;
        { generic aggregate housekeeping }
        begin_aggregate_internal(anonrecorddef,true);
@@ -1924,7 +2083,7 @@ implementation
          begin
            sym:=search_struct_member_no_helper(tabstractrecorddef(curdef),fields[i]);
            if not assigned(sym) or
-              (sym.typ<>fieldvarsym) or
+              not is_normal_fieldvarsym(sym) or
               ((i<>high(fields)) and
                not(tfieldvarsym(sym).vardef.typ in [objectdef,recorddef])) then
              internalerror(2015071505);
@@ -2040,7 +2199,7 @@ implementation
    procedure tlowleveltypedconstplaceholder.replace(ai: tai; d: tdef);
      begin
        if d<>def then
-         internalerror(2015091001);
+         internalerror(2015091007);
        list.insertafter(ai,insertpos);
        list.remove(insertpos);
        insertpos.free;

+ 112 - 4
compiler/aasmdata.pas

@@ -96,7 +96,8 @@ interface
          sp_objcprotocolrefs,
          sp_varsets,
          sp_floats,
-         sp_guids
+         sp_guids,
+         sp_paraloc
       );
       
     const
@@ -134,6 +135,22 @@ interface
          section_count : longint;
          constructor create;
          function  getlasttaifilepos : pfileposinfo;
+         { inserts another List at the begin and make this List empty }
+         procedure insertList(p : TLinkedList); override;
+         { inserts another List before the provided item and make this List empty }
+         procedure insertListBefore(Item:TLinkedListItem;p : TLinkedList); override;
+         { inserts another List after the provided item and make this List empty }
+         procedure insertListAfter(Item:TLinkedListItem;p : TLinkedList); override;
+         { concats another List at the end and make this List empty }
+         procedure concatList(p : TLinkedList); override;
+         { concats another List at the start and makes a copy
+           the list is ordered in reverse.
+         }
+         procedure insertListcopy(p : TLinkedList); override;
+         { concats another List at the end and makes a copy }
+         procedure concatListcopy(p : TLinkedList); override;
+         { removes all items from the list, the items are not freed }
+         procedure RemoveAll; override;
       end;
 
       TAsmCFI=class
@@ -143,10 +160,13 @@ interface
         procedure generate_code(list:TAsmList);virtual;
         procedure start_frame(list:TAsmList);virtual;
         procedure end_frame(list:TAsmList);virtual;
+        procedure outmost_frame(list:TAsmList);virtual;
         procedure cfa_offset(list:TAsmList;reg:tregister;ofs:longint);virtual;
         procedure cfa_restore(list:TAsmList;reg:tregister);virtual;
         procedure cfa_def_cfa_register(list:TAsmList;reg:tregister);virtual;
         procedure cfa_def_cfa_offset(list:TAsmList;ofs:longint);virtual;
+        function get_frame_start: TAsmLabel;virtual;
+        function get_cfa_list : TAsmList;virtual;
       end;
       TAsmCFIClass=class of TAsmCFI;
 
@@ -178,6 +198,7 @@ interface
         { asmsymbol }
         function  DefineAsmSymbolByClass(symclass: TAsmSymbolClass; const s : TSymStr;_bind:TAsmSymBind;_typ:Tasmsymtype; def: tdef) : TAsmSymbol; virtual;
         function  DefineAsmSymbol(const s : TSymStr;_bind:TAsmSymBind;_typ:Tasmsymtype; def: tdef) : TAsmSymbol;
+        function  DefineProcAsmSymbol(pd: tdef; const s: TSymStr; global: boolean): TAsmSymbol;
         function  WeakRefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype) : TAsmSymbol;
         function  RefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype;indirect:boolean=false) : TAsmSymbol;
         function  GetAsmSymbol(const s : TSymStr) : TAsmSymbol;
@@ -228,6 +249,7 @@ implementation
 
     uses
       verbose,
+      globals,
       symconst,
       aasmtai;
 
@@ -268,6 +290,11 @@ implementation
       end;
 
 
+    procedure TAsmCFI.outmost_frame(list: TAsmList);
+      begin
+      end;
+
+
     procedure TAsmCFI.cfa_offset(list:TAsmList;reg:tregister;ofs:longint);
       begin
       end;
@@ -287,6 +314,18 @@ implementation
       begin
       end;
 
+
+    function TAsmCFI.get_frame_start: TAsmLabel;
+      begin
+        Result:=nil;
+      end;
+
+
+    function TAsmCFI.get_cfa_list: TAsmList;
+      begin
+        Result:=nil;
+      end;
+
 {*****************************************************************************
                                  TTCInitItem
 *****************************************************************************}
@@ -337,6 +376,59 @@ implementation
       end;
 
 
+    procedure TAsmList.insertList(p : TLinkedList);
+      begin
+        inherited insertList(p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListBefore(Item : TLinkedListItem; p : TLinkedList);
+      begin
+        inherited insertListBefore(Item,p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListAfter(Item : TLinkedListItem; p : TLinkedList);
+      begin
+        inherited insertListAfter(Item,p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.concatList(p : TLinkedList);
+      begin
+        inherited concatList(p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListcopy(p : TLinkedList);
+      begin
+        inherited insertListcopy(p);
+        inc(section_count,TAsmList(p).section_count);
+     end;
+
+
+    procedure TAsmList.concatListcopy(p : TLinkedList);
+      begin
+        inherited concatListcopy(p);
+        inc(section_count,TAsmList(p).section_count);
+      end;
+
+
+    procedure TAsmList.RemoveAll;
+      begin
+         inherited RemoveAll;
+         section_count:=0;
+      end;
+
+
 {****************************************************************************
                                 TAsmData
 ****************************************************************************}
@@ -423,8 +515,8 @@ implementation
         CurrAsmList:=TAsmList.create;
         for hal:=low(TAsmListType) to high(TAsmListType) do
           AsmLists[hal]:=TAsmList.create;
-        WideInits :=TLinkedList.create;
-        ResStrInits:=TLinkedList.create;
+        WideInits :=TAsmList.create;
+        ResStrInits:=TAsmList.create;
         { CFI }
         FAsmCFI:=CAsmCFI.Create;
       end;
@@ -482,6 +574,21 @@ implementation
       end;
 
 
+    function TAsmData.DefineProcAsmSymbol(pd: tdef; const s: TSymStr; global: boolean): TAsmSymbol;
+      begin
+        { The condition to use global or local symbol must match
+          the code written in hlcg.gen_proc_symbol to
+          avoid change from AB_LOCAL to AB_GLOBAL, which generates
+          erroneous code (at least for targets using GOT) }
+        if global or
+           (cs_profile in current_settings.moduleswitches) then
+          result:=DefineAsmSymbol(s,AB_GLOBAL,AT_FUNCTION,pd)
+        else if tf_supports_hidden_symbols in target_info.flags then
+          result:=DefineAsmSymbol(s,AB_PRIVATE_EXTERN,AT_FUNCTION,pd)
+        else
+          result:=DefineAsmSymbol(s,AB_LOCAL,AT_FUNCTION,pd);
+      end;
+
     function TAsmData.RefAsmSymbol(const s : TSymStr;_typ:Tasmsymtype;indirect:boolean) : TAsmSymbol;
       var
         namestr : TSymStr;
@@ -604,7 +711,8 @@ initialization
   memasmlists:=TMemDebug.create('AsmLists');
   memasmlists.stop;
 {$endif MEMDEBUG}
-  CAsmCFI:=TAsmCFI;
+  if not(assigned(CAsmCFI)) then
+    CAsmCFI:=TAsmCFI;
 
 finalization
 {$ifdef MEMDEBUG}

+ 1 - 1
compiler/aasmdef.pas

@@ -56,7 +56,7 @@ function TAsmDataDef.DefineAsmSymbolByClass(symclass: TAsmSymbolClass; const s:
     result:=DefineAsmSymbolByClassBase(symclass,s,_bind,_typ,def,wasdefined);
     { define the indirect asmsymbol if necessary }
     if not wasdefined and
-       (_bind in [AB_GLOBAL,AB_COMMON]) and
+       (_bind in [AB_GLOBAL,AB_COMMON,AB_PRIVATE_EXTERN]) and
        (_typ<>AT_DATA_NOINDIRECT) and
        (((_typ=AT_DATA) and
          (tf_supports_packages in target_info.flags) and

+ 4 - 0
compiler/aasmsym.pas

@@ -53,6 +53,8 @@ implementation
         case o.typ of
           top_local :
             o.localoper^.localsymderef.build(tlocalvarsym(o.localoper^.localsym));
+          else
+            ;
         end;
       end;
 
@@ -65,6 +67,8 @@ implementation
             end;
           top_local :
             o.localoper^.localsym:=tlocalvarsym(o.localoper^.localsymderef.resolve);
+          else
+            ;
         end;
       end;
 

+ 375 - 62
compiler/aasmtai.pas

@@ -87,9 +87,15 @@ interface
           ait_llvmins, { llvm instruction }
           ait_llvmalias, { alias for a symbol }
           ait_llvmdecl, { llvm symbol declaration (global/external variable, external procdef) }
+          ait_llvmmetadatanode, (* llvm metadata node: !id = !{type value, ...} *)
+          ait_llvmmetadatareftypedconst, { reference to metadata inside a metadata constant }
+          ait_llvmmetadatarefoperand, { llvm metadata referece: !metadataname !id }
 {$endif}
           { SEH directives used in ARM,MIPS and x86_64 COFF targets }
-          ait_seh_directive
+          ait_seh_directive,
+          { Dwarf CFI directive }
+          ait_cfi,
+          ait_eabi_attribute
           );
 
         taiconst_type = (
@@ -145,7 +151,14 @@ interface
           { offset of symbol's GOT slot in GOT }
           aitconst_got,
           { offset of symbol itself from GOT }
-          aitconst_gotoff_symbol
+          aitconst_gotoff_symbol,
+          { offset in TLS block }
+          aitconst_dtpoff,
+          { ARM TLS code }
+          aitconst_gottpoff,
+          aitconst_tpoff,
+          aitconst_tlsgd,
+          aitconst_tlsdesc
         );
 
         tairealconsttype = (
@@ -216,8 +229,13 @@ interface
           'llvmins',
           'llvmalias',
           'llvmdecl',
+          'llvmmetadata',
+          'llvmmetadatareftc',
+          'llvmmetadatarefop',
 {$endif}
-          'seh_directive'
+          'cfi',
+          'seh_directive',
+          'eabi_attribute'
           );
 
     type
@@ -232,6 +250,7 @@ interface
 {$if defined(arm) or defined(aarch64)}
        ,top_conditioncode
        ,top_shifterop
+       ,top_realconst
 {$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
        { m68k only }
@@ -260,7 +279,12 @@ interface
        ,top_cond
        ,top_para
        ,top_asmlist
+       ,top_callingconvention
 {$endif llvm}
+{$if defined(riscv32) or defined(riscv64)}
+       ,top_fenceflags
+       ,top_roundingmode
+{$endif defined(riscv32) or defined(riscv64)}
        );
 
       { kinds of operations that an instruction can perform on an operand }
@@ -270,6 +294,9 @@ interface
         localsym : pointer;
         localsymderef : tderef;
         localsymofs : longint;
+{$ifdef x86}
+        localsegment,
+{$endif x86}
         localindexreg : tregister;
         localscale : byte;
         localgetoffset,
@@ -307,8 +334,13 @@ interface
 {$endif JVM}
 {$ifdef llvm}
                      ait_llvmdecl,
+                     ait_llvmmetadatanode,
+                     ait_llvmmetadatareftypedconst,
+                     ait_llvmmetadatarefoperand,
 {$endif llvm}
-                     ait_seh_directive
+                     ait_seh_directive,
+                     ait_cfi,
+                     ait_eabi_attribute
                     ];
 
 
@@ -354,7 +386,11 @@ interface
           available on the specified CPU; this represents directives such as
           NASM's 'CPU 686' or MASM/TASM's '.686p'. Might not be supported by
           all assemblers. }
-        asd_cpu
+        asd_cpu,
+        { for the OMF object format }
+        asd_omf_linnum_line,
+        { RISC-V }
+        asd_option
       );
 
       TAsmSehDirective=(
@@ -362,10 +398,11 @@ interface
           ash_endprologue,ash_handler,ash_handlerdata,
           ash_eh,ash_32,ash_no32,
           ash_setframe,ash_stackalloc,ash_pushreg,
-          ash_savereg,ash_savexmm,ash_pushframe
+          ash_savereg,ash_savexmm,ash_pushframe,
+          ash_pushnv,ash_savenv
         );
 
-      TSymbolPairKind = (spk_set, spk_thumb_set, spk_localentry);
+      TSymbolPairKind = (spk_set, spk_set_global, spk_thumb_set, spk_localentry);
 
 
     const
@@ -391,17 +428,22 @@ interface
         { ARM }
         'thumb_func',
         'code',
-        'cpu'
+        'cpu',
+        { for the OMF object format }
+        'omf_line',
+        { RISC-V }
+        'option'
       );
       sehdirectivestr : array[TAsmSehDirective] of string[16]=(
         '.seh_proc','.seh_endproc',
         '.seh_endprologue','.seh_handler','.seh_handlerdata',
         '.seh_eh','.seh_32','seh_no32',
         '.seh_setframe','.seh_stackalloc','.seh_pushreg',
-        '.seh_savereg','.seh_savexmm','.seh_pushframe'
+        '.seh_savereg','.seh_savexmm','.seh_pushframe',
+        '.pushnv','.savenv'
       );
       symbolpairkindstr: array[TSymbolPairKind] of string[11]=(
-        '.set', '.thumb_set', '.localentry'
+        '.set', '.set', '.thumb_set', '.localentry'
       );
 
     type
@@ -410,6 +452,9 @@ interface
         { please keep the size of this record <=12 bytes and keep it properly aligned }
         toper = record
           ot : longint;
+        {$ifdef x86}
+          vopext: smallint;
+        {$ENDIF}
           case typ : toptype of
             top_none   : ();
             top_reg    : (reg:tregister);
@@ -426,6 +471,7 @@ interface
         {$if defined(arm) or defined(aarch64)}
             top_shifterop : (shifterop : pshifterop);
             top_conditioncode : (cc : TAsmCond);
+            top_realconst : (val_real:bestreal);
         {$endif defined(arm) or defined(aarch64)}
         {$ifdef m68k}
             top_regset : (dataregset,addrregset,fpuregset: tcpuregisterset);
@@ -451,7 +497,12 @@ interface
             top_fpcond : (fpcond: tllvmfpcmp);
             top_para   : (paras: tfplist);
             top_asmlist : (asmlist: tasmlist);
+            top_callingconvention: (callingconvention: tproccalloption);
         {$endif llvm}
+        {$if defined(riscv32) or defined(riscv64)}
+            top_fenceflags : (fenceflags : TFenceFlags);
+            top_roundingmode : (roundingmode : TRoundingMode);
+        {$endif defined(riscv32) or defined(riscv64)}
         end;
         poper=^toper;
 
@@ -509,6 +560,7 @@ interface
           constructor Create_Global(_sym:tasmsymbol;siz:longint);
           constructor Createname(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
           constructor Createname_global(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
+          constructor Createname_hidden(const _name : string;_symtyp:Tasmsymtype;siz:longint;def:tdef);
           constructor Createname_global_value(const _name : string;_symtyp:Tasmsymtype;siz:longint;val:ptruint;def:tdef);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -559,14 +611,17 @@ interface
           function getcopy:tlinkedlistitem;override;
        end;
 
-
        { Generates a section / segment directive }
        tai_section = class(tai)
           sectype  : TAsmSectiontype;
           secorder : TasmSectionorder;
-          secalign : byte;
+          secalign : longint;
           name     : pshortstring;
-          sec      : TObjSection; { used in binary writer }
+          { used in binary writer }
+          sec      : TObjSection;
+          { used only by ELF so far }
+          secflags : TSectionFlags;
+          secprogbits : TSectionProgbits;
           destructor Destroy;override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -574,7 +629,7 @@ interface
          private
           { this constructor is made private on purpose }
           { because sections should be created via new_section() }
-          constructor Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+          constructor Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:longint;Asecorder:TasmSectionorder=secorder_default);
 {$pop}
        end;
 
@@ -584,8 +639,9 @@ interface
           is_global : boolean;
           sym       : tasmsymbol;
           size      : asizeint;
-          constructor Create(const _name : string;_size : asizeint; def: tdef);
-          constructor Create_global(const _name : string;_size : asizeint; def: tdef);
+          constructor Create(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_global(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure derefimpl;override;
@@ -604,6 +660,9 @@ interface
           symofs,
           value   : int64;
           consttype : taiconst_type;
+          { sleb128 and uleb128 values have a varying length, by calling FixSize their size can be fixed
+            to avoid that other offsets need to be changed. The value to write is stored in fixed_size }
+          fixed_size : byte;
           { we use for the 128bit int64/qword for now because I can't imagine a
             case where we need 128 bit now (FK) }
           constructor Create(_typ:taiconst_type;_value : int64);
@@ -625,6 +684,10 @@ interface
 {$ifdef i8086}
           constructor Create_sym_near(_sym:tasmsymbol);
           constructor Create_sym_far(_sym:tasmsymbol);
+          constructor Createname_near(const name:string;ofs:asizeint);
+          constructor Createname_far(const name:string;ofs:asizeint);
+          constructor Createname_near(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
+          constructor Createname_far(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
 {$endif i8086}
           constructor Create_type_sym(_typ:taiconst_type;_sym:tasmsymbol);
           constructor Create_sym_offset(_sym:tasmsymbol;ofs:asizeint);
@@ -644,6 +707,9 @@ interface
           constructor Create_int_codeptr_unaligned(_value: int64);
           constructor Create_int_dataptr(_value: int64);
           constructor Create_int_dataptr_unaligned(_value: int64);
+{$ifdef avr}
+          constructor Create_int_dataptr_unaligned(_value: int64; size: taiconst_type);
+{$endif}
 {$ifdef i8086}
           constructor Create_seg_name(const name:string);
           constructor Create_dgroup;
@@ -654,6 +720,9 @@ interface
           procedure derefimpl;override;
           function getcopy:tlinkedlistitem;override;
           function size:longint;
+          { sleb128 and uleb128 values have a varying length, by calling FixSize their size can be fixed
+            to avoid that other offsets need to be changed. The value to write is stored in fixed_size }
+          Procedure FixSize;
        end;
 
        { floating point const }
@@ -795,7 +864,7 @@ interface
            procedure derefimpl;override;
            procedure SetCondition(const c:TAsmCond);
            procedure allocate_oper(opers:longint);
-           procedure loadconst(opidx:longint;l:aint);
+           procedure loadconst(opidx:longint;l:tcgint);
            procedure loadsymbol(opidx:longint;s:tasmsymbol;sofs:longint);
            procedure loadlocal(opidx:longint;s:pointer;sofs:longint;indexreg:tregister;scale:byte;getoffset,forceref:boolean);
            procedure loadref(opidx:longint;const r:treference);
@@ -822,11 +891,13 @@ interface
         { alignment for operator }
         tai_align_abstract = class(tai)
            aligntype : byte;   { 1 = no align, 2 = word align, 4 = dword align }
+           maxbytes  : byte;   { if needed bytes would be larger than maxbyes, alignment is ignored }
            fillsize  : byte;   { real size to fill }
            fillop    : byte;   { value to fill with - optional }
            use_op    : boolean;
            constructor Create(b:byte);virtual;
            constructor Create_op(b: byte; _op: byte);virtual;
+           constructor create_max(b: byte; max: byte);virtual;
            constructor Create_zeros(b:byte);
            constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
            procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -911,6 +982,18 @@ interface
           procedure ppuwrite(ppufile:tcompilerppufile);override;
         end;
 
+        teattrtyp = (eattrtype_none,eattrtype_dword,eattrtype_ntbs);
+        tai_eabi_attribute = class(tai)
+          eattr_typ : teattrtyp;
+          tag,value : dword;
+          valuestr : pstring;
+          constructor create(atag,avalue : dword);
+          constructor create(atag : dword;const avalue : string);
+          destructor destroy;override;
+          constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
+          procedure ppuwrite(ppufile:tcompilerppufile);override;
+        end;
+
     var
       { array with all class types for tais }
       aiclass : taiclassarray;
@@ -924,7 +1007,7 @@ interface
       add_reg_instruction_hook : tadd_reg_instruction_proc;
 
     procedure maybe_new_object_file(list:TAsmList);
-    procedure new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    function new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default) : tai_section;
 
     function ppuloadai(ppufile:tcompilerppufile):tai;
     procedure ppuwriteai(ppufile:tcompilerppufile;n:tai);
@@ -933,14 +1016,17 @@ interface
 implementation
 
     uses
+{$ifdef x86}
+      aasmcpu,
+{$endif x86}
       SysUtils,
       verbose,
-      globals;
+      globals,
+      ppu;
 
     const
       pputaimarker = 254;
 
-
 {****************************************************************************
                                  Helpers
  ****************************************************************************}
@@ -952,9 +1038,10 @@ implementation
       end;
 
 
-    procedure new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    function new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default) : tai_section;
       begin
-        list.concat(tai_section.create(Asectype,Aname,Aalign,Asecorder));
+        Result:=tai_section.create(Asectype,Aname,Aalign,Asecorder);
+        list.concat(Result);
         inc(list.section_count);
         list.concat(cai_align.create(Aalign));
       end;
@@ -1167,13 +1254,14 @@ implementation
                              TAI_SECTION
  ****************************************************************************}
 
-    constructor tai_section.Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    constructor tai_section.Create(Asectype:TAsmSectiontype;const Aname:string;Aalign:longint;Asecorder:TasmSectionorder=secorder_default);
       begin
         inherited Create;
         typ:=ait_section;
         sectype:=asectype;
         secalign:=Aalign;
         secorder:=Asecorder;
+        TObjData.sectiontype2progbitsandflags(sectype,secprogbits,secflags);
         name:=stringdup(Aname);
         sec:=nil;
       end;
@@ -1183,8 +1271,10 @@ implementation
       begin
         inherited ppuload(t,ppufile);
         sectype:=TAsmSectiontype(ppufile.getbyte);
-        secalign:=ppufile.getbyte;
+        secalign:=ppufile.getlongint;
         name:=ppufile.getpshortstring;
+        ppufile.getset(tppuset1(secflags));
+        secprogbits:=TSectionProgbits(ppufile.getbyte);
         sec:=nil;
       end;
 
@@ -1199,8 +1289,10 @@ implementation
       begin
         inherited ppuwrite(ppufile);
         ppufile.putbyte(byte(sectype));
-        ppufile.putbyte(secalign);
+        ppufile.putlongint(secalign);
         ppufile.putstring(name^);
+        ppufile.putset(tppuset1(secflags));
+        ppufile.putbyte(byte(secprogbits));
       end;
 
 
@@ -1208,12 +1300,12 @@ implementation
                              TAI_DATABLOCK
  ****************************************************************************}
 
-    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef);
+    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
 
       begin
          inherited Create;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,_typ,def);
          { keep things aligned }
          if _size<=0 then
            _size:=sizeof(aint);
@@ -1221,12 +1313,29 @@ implementation
          is_global:=false;
       end;
 
+    constructor tai_datablock.Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ:Tasmsymtype);
+      begin
+        if tf_supports_hidden_symbols in target_info.flags then
+          begin
+            inherited Create;
+            typ:=ait_datablock;
+            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,_typ,def);
+            { keep things aligned }
+            if _size<=0 then
+              _size:=sizeof(aint);
+            size:=_size;
+            is_global:=true;
+          end
+        else
+          Create(_name,_size,def,_typ);
+      end;
 
-    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef);
+
+    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
       begin
          inherited Create;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,_typ,def);
          { keep things aligned }
          if _size<=0 then
            _size:=sizeof(aint);
@@ -1240,7 +1349,7 @@ implementation
         inherited Create;
         sym:=ppufile.getasmsymbol;
         size:=ppufile.getaint;
-        is_global:=boolean(ppufile.getbyte);
+        is_global:=ppufile.getboolean;
       end;
 
 
@@ -1249,7 +1358,7 @@ implementation
         inherited ppuwrite(ppufile);
         ppufile.putasmsymbol(sym);
         ppufile.putaint(size);
-        ppufile.putbyte(byte(is_global));
+        ppufile.putboolean(is_global);
       end;
 
 
@@ -1309,6 +1418,20 @@ implementation
          is_global:=true;
       end;
 
+    constructor tai_symbol.Createname_hidden(const _name: string; _symtyp: Tasmsymtype; siz: longint; def: tdef);
+      begin
+        if tf_supports_hidden_symbols in target_info.flags then
+          begin
+            inherited Create;
+            typ:=ait_symbol;
+            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,_symtyp,def);
+            size:=siz;
+            is_global:=true;
+          end
+        else
+          Createname(_name, _symtyp, siz, def);
+      end;
+
 
     constructor tai_symbol.createname_global_value(const _name: string;_symtyp: tasmsymtype; siz: longint; val: ptruint;def:tdef);
       begin
@@ -1323,7 +1446,7 @@ implementation
         inherited ppuload(t,ppufile);
         sym:=ppufile.getasmsymbol;
         size:=ppufile.getlongint;
-        is_global:=boolean(ppufile.getbyte);
+        is_global:=ppufile.getboolean;
       end;
 
 
@@ -1332,7 +1455,7 @@ implementation
         inherited ppuwrite(ppufile);
         ppufile.putasmsymbol(sym);
         ppufile.putlongint(size);
-        ppufile.putbyte(byte(is_global));
+        ppufile.putboolean(is_global);
       end;
 
 
@@ -1616,11 +1739,40 @@ implementation
          consttype:=aitconst_ptr;
       end;
 
+
     constructor tai_const.Create_sym_far(_sym: tasmsymbol);
       begin
         self.create_sym(_sym);
         consttype:=aitconst_farptr;
       end;
+
+
+    constructor tai_const.Createname_near(const name:string;ofs:asizeint);
+      begin
+        self.Createname(name,ofs);
+        consttype:=aitconst_ptr;
+      end;
+
+
+    constructor tai_const.Createname_far(const name:string;ofs:asizeint);
+      begin
+        self.Createname(name,ofs);
+        consttype:=aitconst_farptr;
+      end;
+
+
+    constructor tai_const.Createname_near(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
+      begin
+        self.Createname(name,_symtyp,ofs);
+        consttype:=aitconst_ptr;
+      end;
+
+
+    constructor tai_const.Createname_far(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
+      begin
+        self.Createname(name,_symtyp,ofs);
+        consttype:=aitconst_farptr;
+      end;
 {$endif i8086}
 
 
@@ -1691,7 +1843,7 @@ implementation
       end;
 
 
-    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym,_endsym: tasmsymbol; _ofs: int64);
+    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym, _endsym: tasmsymbol; _ofs: int64);
        begin
          self.create_sym_offset(_sym,_ofs);
          consttype:=_typ;
@@ -1832,6 +1984,20 @@ implementation
       end;
 
 
+{$ifdef avr}
+    constructor tai_const.Create_int_dataptr_unaligned(_value: int64;
+      size: taiconst_type);
+      begin
+        inherited Create;
+        typ:=ait_const;
+        consttype:=size;
+        sym:=nil;
+        endsym:=nil;
+        symofs:=0;
+        value:=_value;
+      end;
+{$endif avr}
+
 {$ifdef i8086}
     constructor tai_const.Create_seg_name(const name:string);
       begin
@@ -1899,10 +2065,10 @@ implementation
           aitconst_16bit,aitconst_16bit_unaligned :
             result:=2;
           aitconst_32bit,aitconst_darwin_dwarf_delta32,
-	  aitconst_32bit_unaligned:
+          aitconst_32bit_unaligned:
             result:=4;
           aitconst_64bit,aitconst_darwin_dwarf_delta64,
-	  aitconst_64bit_unaligned:
+          aitconst_64bit_unaligned:
             result:=8;
           aitconst_secrel32_symbol,
           aitconst_rva_symbol :
@@ -1911,9 +2077,31 @@ implementation
             else
               result:=sizeof(pint);
           aitconst_uleb128bit :
-            result:=LengthUleb128(qword(value));
+            begin
+              if fixed_size>0 then
+                result:=fixed_size
+              else if sym=nil then
+                begin
+                  FixSize;
+                  result:=fixed_size;
+                end
+              else
+                { worst case }
+                result:=sizeof(pint)+2;
+            end;
           aitconst_sleb128bit :
-            result:=LengthSleb128(value);
+            begin
+              if fixed_size>0 then
+                result:=fixed_size
+              else if sym=nil then
+                begin
+                  FixSize;
+                  result:=fixed_size;
+                end
+              else
+                { worst case }
+                result:=sizeof(pint)+2;
+            end;
           aitconst_half16bit,
           aitconst_gs:
             result:=2;
@@ -1927,12 +2115,35 @@ implementation
             result:=sizeof(pint);
           aitconst_gotoff_symbol:
             result:=4;
+          aitconst_gottpoff:
+            result:=4;
+          aitconst_tlsgd:
+            result:=4;
+          aitconst_tpoff:
+            result:=4;
+          aitconst_tlsdesc:
+            result:=4;
+          aitconst_dtpoff:
+            result:=4;
           else
             internalerror(200603253);
         end;
       end;
 
 
+    procedure tai_const.FixSize;
+      begin
+        case consttype of
+          aitconst_uleb128bit:
+            fixed_size:=LengthUleb128(qword(value));
+          aitconst_sleb128bit:
+            fixed_size:=LengthSleb128(value);
+          else
+            Internalerror(2019030301);
+        end;
+      end;
+
+
 {****************************************************************************
                                TAI_realconst
  ****************************************************************************}
@@ -2017,8 +2228,6 @@ implementation
             value.s128val:=ppufile.getreal;
           aitrealconst_s64comp:
             value.s64compval:=comp(ppufile.getint64);
-          else
-            internalerror(2014050602);
         end;
       end;
 
@@ -2046,8 +2255,6 @@ implementation
               c:=comp(value.s64compval);
               ppufile.putint64(int64(c));
             end
-          else
-            internalerror(2014050601);
         end;
       end;
 
@@ -2076,8 +2283,6 @@ implementation
             result:=10;
           aitrealconst_s128bit:
             result:=16;
-          else
-            internalerror(2014050603);
         end;
       end;
 
@@ -2420,7 +2625,7 @@ implementation
         inherited ppuload(t,ppufile);
         temppos:=ppufile.getlongint;
         tempsize:=ppufile.getlongint;
-        allocation:=boolean(ppufile.getbyte);
+        allocation:=ppufile.getboolean;
 {$ifdef EXTDEBUG}
         problem:=nil;
 {$endif EXTDEBUG}
@@ -2432,7 +2637,7 @@ implementation
         inherited ppuwrite(ppufile);
         ppufile.putlongint(temppos);
         ppufile.putlongint(tempsize);
-        ppufile.putbyte(byte(allocation));
+        ppufile.putboolean(allocation);
       end;
 
 
@@ -2500,7 +2705,7 @@ implementation
         inherited ppuload(t,ppufile);
         ppufile.getdata(reg,sizeof(Tregister));
         ratype:=tregalloctype(ppufile.getbyte);
-        keep:=boolean(ppufile.getbyte);
+        keep:=ppufile.getboolean;
       end;
 
 
@@ -2509,7 +2714,7 @@ implementation
         inherited ppuwrite(ppufile);
         ppufile.putdata(reg,sizeof(Tregister));
         ppufile.putbyte(byte(ratype));
-        ppufile.putbyte(byte(keep));
+        ppufile.putboolean(keep);
       end;
 
 
@@ -2553,7 +2758,7 @@ implementation
       end;
 
 
-    procedure tai_cpu_abstract.loadconst(opidx:longint;l:aint);
+    procedure tai_cpu_abstract.loadconst(opidx:longint;l:tcgint);
       begin
         allocate_oper(opidx+1);
         with oper[opidx]^ do
@@ -2596,6 +2801,9 @@ implementation
                localscale:=scale;
                localgetoffset:=getoffset;
                localforceref:=forceref;
+{$ifdef x86}
+               localsegment:=NR_NO;
+{$endif x86}
              end;
            typ:=top_local;
          end;
@@ -2603,6 +2811,10 @@ implementation
 
 
     procedure tai_cpu_abstract.loadref(opidx:longint;const r:treference);
+{$ifdef x86}
+      var
+        si_param: ShortInt;
+{$endif}
       begin
         allocate_oper(opidx+1);
         with oper[opidx]^ do
@@ -2617,7 +2829,17 @@ implementation
 {$ifdef x86}
             { We allow this exception for x86, since overloading this would be
               too much of a a speed penalty}
-            if (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+            if is_x86_parameterized_string_op(opcode) then
+              begin
+                si_param:=get_x86_string_op_si_param(opcode);
+                if (si_param<>-1) and (taicpu(self).OperandOrder=op_att) then
+                  si_param:=x86_parameterized_string_op_param_count(opcode)-si_param-1;
+                if (si_param=opidx) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                  segprefix:=ref^.segment;
+              end
+            else if (opcode=A_XLAT) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+              segprefix:=ref^.segment
+            else if (ref^.segment<>NR_NO) and (ref^.segment<>get_default_segment_of_ref(ref^)) then
               segprefix:=ref^.segment;
 {$endif}
 {$ifndef llvm}
@@ -2673,6 +2895,10 @@ implementation
 
 
     procedure tai_cpu_abstract.loadoper(opidx:longint;o:toper);
+{$ifdef x86}
+      var
+        si_param: ShortInt;
+{$endif x86}
       begin
         allocate_oper(opidx+1);
         clearop(opidx);
@@ -2691,7 +2917,19 @@ implementation
                   new(ref);
                   ref^:=o.ref^;
 {$ifdef x86}
-                  if (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                  { We allow this exception for x86, since overloading this would be
+                    too much of a a speed penalty}
+                  if is_x86_parameterized_string_op(opcode) then
+                    begin
+                      si_param:=get_x86_string_op_si_param(opcode);
+                      if (si_param<>-1) and (taicpu(self).OperandOrder=op_att) then
+                        si_param:=x86_parameterized_string_op_param_count(opcode)-si_param-1;
+                      if (si_param=opidx) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                        segprefix:=ref^.segment;
+                    end
+                  else if (opcode=A_XLAT) and (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                    segprefix:=ref^.segment
+                  else if (ref^.segment<>NR_NO) and (ref^.segment<>get_default_segment_of_ref(ref^)) then
                     segprefix:=ref^.segment;
 {$endif x86}
                   if assigned(add_reg_instruction_hook) then
@@ -2709,6 +2947,8 @@ implementation
                     add_reg_instruction_hook(self,shifterop^.rs);
                 end;
 {$endif ARM}
+              else
+                ;
              end;
           end;
       end;
@@ -2734,6 +2974,8 @@ implementation
               top_wstring:
                 donewidestring(pwstrval);
 {$endif jvm}
+              else
+                ;
             end;
             typ:=top_none;
           end;
@@ -2787,6 +3029,8 @@ implementation
                   p.oper[i]^.shifterop^:=oper[i]^.shifterop^;
                 end;
 {$endif ARM}
+              else
+                ;
             end;
           end;
         getcopy:=p;
@@ -2821,16 +3065,17 @@ implementation
         i : integer;
       begin
         inherited ppuload(t,ppufile);
-        { hopefully, we don't get problems with big/litte endian here when cross compiling :/ }
+        { hopefully, we don't get problems with big/little endian here when cross compiling :/ }
         ppufile.getdata(condition,sizeof(tasmcond));
-        allocate_oper(ppufile.getbyte);
+        ops := ppufile.getbyte;
+        allocate_oper(ops);
         for i:=0 to ops-1 do
           ppuloadoper(ppufile,oper[i]^);
         opcode:=tasmop(ppufile.getword);
 {$ifdef x86}
         ppufile.getdata(segprefix,sizeof(Tregister));
 {$endif x86}
-        is_jmp:=boolean(ppufile.getbyte);
+        is_jmp:=ppufile.getboolean;
       end;
 
 
@@ -2847,7 +3092,7 @@ implementation
 {$ifdef x86}
         ppufile.putdata(segprefix,sizeof(Tregister));
 {$endif x86}
-        ppufile.putbyte(byte(is_jmp));
+        ppufile.putboolean(is_jmp);
       end;
 
 
@@ -2920,6 +3165,9 @@ implementation
                 begin
                   ppufile.getderef(localsymderef);
                   localsymofs:=ppufile.getaint;
+{$ifdef x86}
+                  localsegment:=tregister(ppufile.getlongint);
+{$endif x86}
                   localindexreg:=tregister(ppufile.getlongint);
                   localscale:=ppufile.getbyte;
                   localgetoffset:=(ppufile.getbyte<>0);
@@ -2959,6 +3207,9 @@ implementation
                 begin
                   ppufile.putderef(localsymderef);
                   ppufile.putaint(localsymofs);
+{$ifdef x86}
+                  ppufile.putlongint(longint(localsegment));
+{$endif x86}
                   ppufile.putlongint(longint(localindexreg));
                   ppufile.putbyte(localscale);
                   ppufile.putbyte(byte(localgetoffset));
@@ -2988,6 +3239,7 @@ implementation
           fillsize:=0;
           fillop:=0;
           use_op:=false;
+          maxbytes:=aligntype;
        end;
 
 
@@ -3002,6 +3254,22 @@ implementation
           fillsize:=0;
           fillop:=_op;
           use_op:=true;
+          maxbytes:=aligntype;
+       end;
+
+
+     constructor tai_align_abstract.create_max(b : byte; max : byte);
+       begin
+          inherited Create;
+          typ:=ait_align;
+          if b in [1,2,4,8,16,32] then
+            aligntype := b
+          else
+            aligntype := 1;
+          maxbytes:=max;
+          fillsize:=0;
+          fillop:=0;
+          use_op:=false;
        end;
 
 
@@ -3016,6 +3284,7 @@ implementation
          use_op:=true;
          fillsize:=0;
          fillop:=0;
+         maxbytes:=aligntype;
        end;
 
 
@@ -3034,7 +3303,8 @@ implementation
         aligntype:=ppufile.getbyte;
         fillsize:=0;
         fillop:=ppufile.getbyte;
-        use_op:=boolean(ppufile.getbyte);
+        use_op:=ppufile.getboolean;
+        maxbytes:=ppufile.getbyte;
       end;
 
 
@@ -3043,7 +3313,8 @@ implementation
         inherited ppuwrite(ppufile);
         ppufile.putbyte(aligntype);
         ppufile.putbyte(fillop);
-        ppufile.putbyte(byte(use_op));
+        ppufile.putboolean(use_op);
+        ppufile.putbyte(maxbytes);
       end;
 
 
@@ -3064,7 +3335,9 @@ implementation
         sd_reg,        { pushreg }
         sd_regoffset,  { savereg }
         sd_regoffset,  { savexmm }
-        sd_none        { pushframe }
+        sd_none,       { pushframe }
+        sd_reg,        { pushnv }
+        sd_none        { savenv }
       );
 
     constructor tai_seh_directive.create(_kind:TAsmSehDirective);
@@ -3119,8 +3392,6 @@ implementation
               ppufile.getdata(data.reg,sizeof(TRegister));
               data.offset:=ppufile.getdword;
             end;
-        else
-          InternalError(2011091201);
         end;
       end;
 
@@ -3148,8 +3419,6 @@ implementation
               ppufile.putdata(data.reg,sizeof(TRegister));
               ppufile.putdword(data.offset);
             end;
-        else
-          InternalError(2011091202);
         end;
       end;
 
@@ -3157,6 +3426,50 @@ implementation
       begin
       end;
 
+
+{****************************************************************************
+                              tai_eabi_attribute
+ ****************************************************************************}
+
+    constructor tai_eabi_attribute.create(atag,avalue : dword);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_dword;
+        tag:=atag;
+        value:=avalue;
+      end;
+
+
+    constructor tai_eabi_attribute.create(atag: dword; const avalue: string);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_ntbs;
+        tag:=atag;
+        valuestr:=NewStr(avalue);
+      end;
+
+
+    destructor tai_eabi_attribute.destroy;
+      begin
+        Inherited Destroy;
+      end;
+
+
+    constructor tai_eabi_attribute.ppuload(t:taitype;ppufile:tcompilerppufile);
+      begin
+      end;
+
+
+    procedure tai_eabi_attribute.ppuwrite(ppufile:tcompilerppufile);
+      begin
+        inherited ppuwrite(ppufile);
+        ppufile.putdword(tag);
+        ppufile.putdword(value);
+      end;
+
+
 {$ifdef JVM}
 
 {****************************************************************************

+ 322 - 89
compiler/aggas.pas

@@ -1,4 +1,4 @@
-{
+  {
     Copyright (c) 1998-2006 by the Free Pascal team
 
     This unit implements the generic part of the GNU assembler
@@ -32,7 +32,7 @@ interface
 
     uses
       globtype,globals,
-      aasmbase,aasmtai,aasmdata,
+      aasmbase,aasmtai,aasmdata,aasmcfi,
       assemble;
 
     type
@@ -48,12 +48,14 @@ interface
         function sectionname(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder):string;virtual;
         function sectionattrs(atype:TAsmSectiontype):string;virtual;
         function sectionattrs_coff(atype:TAsmSectiontype):string;virtual;
-        function sectionalignment_aix(atype:TAsmSectiontype;secalign: byte):string;
-        procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:byte);
+        function sectionalignment_aix(atype:TAsmSectiontype;secalign: longint):string;
+        procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;
+          secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);virtual;
         procedure WriteExtraHeader;virtual;
         procedure WriteExtraFooter;virtual;
         procedure WriteInstruction(hp: tai);
         procedure WriteWeakSymbolRef(s: tasmsymbol); virtual;
+        procedure WriteHiddenSymbol(sym: TAsmSymbol);
         procedure WriteAixStringConst(hp: tai_string);
         procedure WriteAixIntConst(hp: tai_const);
         procedure WriteUnalignedIntConst(hp: tai_const);
@@ -67,6 +69,7 @@ interface
         setcount: longint;
         procedure WriteDecodedSleb128(a: int64);
         procedure WriteDecodedUleb128(a: qword);
+        procedure WriteCFI(hp: tai_cfi_base);
         function NextSetLabel: string;
        protected
         InstrWriter: TCPUInstrWriter;
@@ -211,11 +214,11 @@ implementation
 { vtable for a class called Window:                                       }
 { .section .data.rel.ro._ZTV6Window,"awG",@progbits,_ZTV6Window,comdat    }
 { TODO: .data.ro not yet working}
-{$if defined(arm) or defined(powerpc)}
+{$if defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
-{$else arm}
+{$else defined(arm) or defined(riscv64) or defined(powerpc)}
           '.data',
-{$endif arm}
+{$endif defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
           '.bss',
           '.threadvar',
@@ -269,7 +272,9 @@ implementation
           '.obcj_nlcatlist',
           '.objc_protolist',
           '.stack',
-          '.heap'
+          '.heap',
+          '.gcc_except_table',
+          '.ARM.attributes'
         );
         secnames_pic : array[TAsmSectiontype] of string[length('__DATA, __datacoal_nt,coalesced')] = ('','',
           '.text',
@@ -328,7 +333,9 @@ implementation
           '.obcj_nlcatlist',
           '.objc_protolist',
           '.stack',
-          '.heap'
+          '.heap',
+          '.gcc_except_table',
+          '..ARM.attributes'
         );
       var
         sep     : string[3];
@@ -346,15 +353,19 @@ implementation
             exit;
           end;
 
-        if (atype=sec_threadvar) and
-          (target_info.system in (systems_windows+systems_wince)) then
-          secname:='.tls';
+        if atype=sec_threadvar then
+          begin
+            if (target_info.system in (systems_windows+systems_wince)) then
+              secname:='.tls'
+            else if (target_info.system in systems_linux) then
+              secname:='.tbss';
+          end;
 
         { go32v2 stub only loads .text and .data sections, and allocates space for .bss.
           Thus, data which normally goes into .rodata and .rodata_norel sections must
           end up in .data section }
         if (atype in [sec_rodata,sec_rodata_norel]) and
-          (target_info.system=system_i386_go32v2) then
+          (target_info.system in [system_i386_go32v2,system_m68k_palmos]) then
           secname:='.data';
 
         { Windows correctly handles reallocations in readonly sections }
@@ -362,9 +373,18 @@ implementation
           (target_info.system in systems_all_windows+systems_nativent-[system_i8086_win16]) then
           secname:='.rodata';
 
-        { Use .rodata for Android }
-        if (target_info.system in systems_android) and (atype in [sec_rodata,sec_rodata_norel]) then
-          secname:='.rodata';
+        { Use .rodata and .data.rel.ro for Android with PIC }
+        if (target_info.system in systems_android) and (cs_create_pic in current_settings.moduleswitches) then
+          begin
+            case atype of
+              sec_rodata:
+                secname:='.data.rel.ro';
+              sec_rodata_norel:
+                secname:='.rodata';
+              else
+                ;
+            end;
+          end;
 
         { section type user gives the user full controll on the section name }
         if atype=sec_user then
@@ -434,7 +454,7 @@ implementation
       end;
 
 
-    function TGNUAssembler.sectionalignment_aix(atype:TAsmSectiontype;secalign: byte): string;
+    function TGNUAssembler.sectionalignment_aix(atype:TAsmSectiontype;secalign: longint): string;
       var
         l: longint;
       begin
@@ -450,11 +470,16 @@ implementation
       end;
 
 
-    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:byte);
+    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);
       var
         s : string;
+        secflag: TSectionFlag;
+        sectionprogbits,
+        sectionflags: boolean;
       begin
         writer.AsmLn;
+        sectionflags:=false;
+        sectionprogbits:=false;
         case target_info.system of
          system_i386_OS2,
          system_i386_EMX: ;
@@ -463,7 +488,21 @@ implementation
            begin
              { ... but vasm is GAS compatible on amiga/atari, and supports named sections }
              if create_smartlink_sections then
-               writer.AsmWrite('.section ');
+               begin
+                 writer.AsmWrite('.section ');
+                 sectionflags:=true;
+                 sectionprogbits:=true;
+               end;
+           end;
+         system_i386_win32,
+         system_x86_64_win64,
+         system_i386_wince,
+         system_arm_wince:
+           begin
+             { according to the GNU AS guide AS for COFF does not support the
+               progbits }
+             writer.AsmWrite('.section ');
+             sectionflags:=true;
            end;
          system_powerpc_darwin,
          system_i386_darwin,
@@ -480,60 +519,107 @@ implementation
                writer.AsmWrite('.section ');
            end
          else
-          writer.AsmWrite('.section ');
+           begin
+             writer.AsmWrite('.section ');
+             { sectionname may rename those sections, so we do not write flags/progbits for them,
+               the assembler will ignore them/spite out a warning anyways }
+             if not(atype in [sec_data,sec_rodata,sec_rodata_norel]) then
+               begin
+                 sectionflags:=true;
+                 sectionprogbits:=true;
+               end;
+           end
         end;
         s:=sectionname(atype,aname,aorder);
         writer.AsmWrite(s);
-        case atype of
-          sec_fpc :
-            if aname = 'resptrs' then
-              writer.AsmWrite(', "a", @progbits');
-          sec_stub :
-            begin
-              case target_info.system of
-                { there are processor-independent shortcuts available    }
-                { for this, namely .symbol_stub and .picsymbol_stub, but }
-                { they don't work and gcc doesn't use them either...     }
-                system_powerpc_darwin,
-                system_powerpc64_darwin:
-                  if (cs_create_pic in current_settings.moduleswitches) then
-                    writer.AsmWriteln('__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32')
-                  else
-                    writer.AsmWriteln('__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16');
-                system_i386_darwin,
-                system_i386_iphonesim:
-                  writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
-                system_arm_darwin:
-                  if (cs_create_pic in current_settings.moduleswitches) then
-                    writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
+        { flags explicitly defined? }
+        if (sectionflags or sectionprogbits) and
+           ((secflags<>[]) or
+            (secprogbits<>SPB_None)) then
+          begin
+            if sectionflags then
+              begin
+                s:=',"';
+                for secflag in secflags do
+                  case secflag of
+                    SF_A:
+                      s:=s+'a';
+                    SF_W:
+                      s:=s+'w';
+                    SF_X:
+                      s:=s+'x';
+                  end;
+                writer.AsmWrite(s+'"');
+              end;
+            if sectionprogbits then
+              begin
+                case secprogbits of
+                  SPB_PROGBITS:
+                    writer.AsmWrite(',%progbits');
+                  SPB_NOBITS:
+                    writer.AsmWrite(',%nobits');
+                  SPB_NOTE:
+                    writer.AsmWrite(',%note');
+                  SPB_None:
+                    ;
                   else
-                    writer.AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
-                { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
-                  explicit symbol stubs }
-                else
-                  internalerror(2006031101);
+                    InternalError(2019100801);
+                end;
               end;
-            end;
+          end
         else
-          { GNU AS won't recognize '.text.n_something' section name as belonging
-            to '.text' and assigns default attributes to it, which is not
-            always correct. We have to fix it.
+          case atype of
+            sec_fpc :
+              if aname = 'resptrs' then
+                writer.AsmWrite(', "a", @progbits');
+            sec_stub :
+              begin
+                case target_info.system of
+                  { there are processor-independent shortcuts available    }
+                  { for this, namely .symbol_stub and .picsymbol_stub, but }
+                  { they don't work and gcc doesn't use them either...     }
+                  system_powerpc_darwin,
+                  system_powerpc64_darwin:
+                    if (cs_create_pic in current_settings.moduleswitches) then
+                      writer.AsmWriteln('__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32')
+                    else
+                      writer.AsmWriteln('__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16');
+                  system_i386_darwin,
+                  system_i386_iphonesim:
+                    writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
+                  system_arm_darwin:
+                    if (cs_create_pic in current_settings.moduleswitches) then
+                      writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
+                    else
+                      writer.AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
+                  { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
+                    explicit symbol stubs }
+                  else
+                    internalerror(2006031101);
+                end;
+              end;
+          else
+            { GNU AS won't recognize '.text.n_something' section name as belonging
+              to '.text' and assigns default attributes to it, which is not
+              always correct. We have to fix it.
 
-            TODO: This likely applies to all systems which smartlink without
-            creating libraries }
-          if is_smart_section(atype) and (aname<>'') then
+              TODO: This likely applies to all systems which smartlink without
+              creating libraries }
             begin
-              s:=sectionattrs(atype);
-              if (s<>'') then
-                writer.AsmWrite(',"'+s+'"');
-            end
-         else if target_info.system in systems_aix then
-           begin
-             s:=sectionalignment_aix(atype,secalign);
-             if s<>'' then
-               writer.AsmWrite(','+s);
-           end;
-        end;
+              if is_smart_section(atype) and (aname<>'') then
+                begin
+                  s:=sectionattrs(atype);
+                  if (s<>'') then
+                    writer.AsmWrite(',"'+s+'"');
+                end;
+              if target_info.system in systems_aix then
+                begin
+                  s:=sectionalignment_aix(atype,secalign);
+                  if s<>'' then
+                    writer.AsmWrite(','+s);
+                end;
+            end;
+          end;
         writer.AsmLn;
         LastSecType:=atype;
       end;
@@ -544,7 +630,7 @@ implementation
         i,len : longint;
         buf   : array[0..63] of byte;
       begin
-        len:=EncodeUleb128(a,buf);
+        len:=EncodeUleb128(a,buf,0);
         for i:=0 to len-1 do
           begin
             if (i > 0) then
@@ -554,12 +640,45 @@ implementation
       end;
 
 
+    procedure TGNUAssembler.WriteCFI(hp: tai_cfi_base);
+      begin
+        writer.AsmWrite(cfi2str[hp.cfityp]);
+        case hp.cfityp of
+          cfi_startproc,
+          cfi_endproc:
+            ;
+          cfi_undefined,
+          cfi_restore,
+          cfi_def_cfa_register:
+            begin
+              writer.AsmWrite(' ');
+              writer.AsmWrite(gas_regname(tai_cfi_op_reg(hp).reg1));
+            end;
+          cfi_def_cfa_offset:
+            begin
+              writer.AsmWrite(' ');
+              writer.AsmWrite(tostr(tai_cfi_op_val(hp).val1));
+            end;
+          cfi_offset:
+            begin
+              writer.AsmWrite(' ');
+              writer.AsmWrite(gas_regname(tai_cfi_op_reg_val(hp).reg1));
+              writer.AsmWrite(',');
+              writer.AsmWrite(tostr(tai_cfi_op_reg_val(hp).val));
+            end;
+          else
+            internalerror(2019030203);
+        end;
+        writer.AsmLn;
+      end;
+
+
     procedure TGNUAssembler.WriteDecodedSleb128(a: int64);
       var
         i,len : longint;
         buf   : array[0..255] of byte;
       begin
-        len:=EncodeSleb128(a,buf);
+        len:=EncodeSleb128(a,buf,0);
         for i:=0 to len-1 do
           begin
             if (i > 0) then
@@ -583,9 +702,10 @@ implementation
         end;
 
 
-      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; out last_align: longint;lasthp:tai);
+      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; maxbytes: byte; out last_align: longint;lasthp:tai);
         var
           i: longint;
+          alignment64 : int64;
 {$ifdef m68k}
           instr : string;
 {$endif}
@@ -612,14 +732,33 @@ implementation
                   else
                     begin
 {$endif m68k}
-                  writer.AsmWrite(#9'.balign '+tostr(alignment));
-                  if use_op then
-                    writer.AsmWrite(','+tostr(fillop))
+                      alignment64:=alignment;
+                      if (maxbytes<>alignment) and ispowerof2(alignment64,i) then
+                        begin
+                          if use_op then
+                            begin
+                              writer.AsmWrite(#9'.p2align '+tostr(i)+','+tostr(fillop)+','+tostr(maxbytes));
+                              writer.AsmLn;
+                              writer.AsmWrite(#9'.p2align '+tostr(i-1)+','+tostr(fillop));
+                            end
+                          else
+                            begin
+                              writer.AsmWrite(#9'.p2align '+tostr(i)+',,'+tostr(maxbytes));
+                              writer.AsmLn;
+                              writer.AsmWrite(#9'.p2align '+tostr(i-1));
+                            end
+                        end
+                      else
+                        begin
+                          writer.AsmWrite(#9'.balign '+tostr(alignment));
+                          if use_op then
+                            writer.AsmWrite(','+tostr(fillop))
 {$ifdef x86}
-                  { force NOP as alignment op code }
-                  else if (LastSecType=sec_code) and (asminfo^.id<>as_solaris_as) then
-                    writer.AsmWrite(',0x90');
+                          { force NOP as alignment op code }
+                          else if (LastSecType=sec_code) and (asminfo^.id<>as_solaris_as) then
+                            writer.AsmWrite(',0x90');
 {$endif x86}
+                        end;
 {$ifdef m68k}
                     end;
 {$endif m68k}
@@ -709,16 +848,18 @@ implementation
 
            ait_align :
              begin
-               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,last_align,lasthp);
+               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,tai_align_abstract(hp).maxbytes,last_align,lasthp);
              end;
 
            ait_section :
              begin
                if tai_section(hp).sectype<>sec_none then
                  if replaceforbidden then
-                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,tai_section(hp).secalign)
+                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,
+                     tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                  else
-                   WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,tai_section(hp).secalign)
+                   WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,
+                     tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                else
                  begin
 {$ifdef EXTDEBUG}
@@ -738,8 +879,11 @@ implementation
                      processes). The alternate code creates some kind of common symbols
                      in the data segment.
                    }
+
                    if tai_datablock(hp).is_global then
                      begin
+                       if tai_datablock(hp).sym.bind=AB_PRIVATE_EXTERN then
+                         WriteHiddenSymbol(tai_datablock(hp).sym);
                        writer.AsmWrite('.globl ');
                        writer.AsmWriteln(tai_datablock(hp).sym.name);
                        writer.AsmWriteln('.data');
@@ -818,6 +962,8 @@ implementation
                      begin
                        if Tai_datablock(hp).is_global then
                          begin
+                           if (tai_datablock(hp).sym.bind=AB_PRIVATE_EXTERN) then
+                             WriteHiddenSymbol(tai_datablock(hp).sym);
                            writer.AsmWrite(#9'.globl ');
                            if replaceforbidden then
                              writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name))
@@ -884,7 +1030,44 @@ implementation
                         WriteAixIntConst(tai_const(hp));
                       writer.AsmLn;
                     end;
+                 aitconst_gottpoff:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tlsgd:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsgd)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tlsdesc:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsdesc)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tpoff:
+                   begin
+                     if assigned(tai_const(hp).endsym) or (tai_const(hp).symofs<>0) then
+                       Internalerror(2019092805);
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tpoff)');
+                     writer.Asmln;
+                   end;
 {$endif cpu64bitaddr}
+                 aitconst_dtpoff:
+                   begin
+{$ifdef arm}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsldo)');
+                     writer.Asmln;
+{$endif arm}
+{$ifdef x86_64}
+                     writer.AsmWrite(#9'.long'#9+tai_const(hp).sym.name+'@dtpoff');
+                     writer.Asmln;
+{$endif x86_64}
+{$ifdef i386}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'@tdpoff');
+                     writer.Asmln;
+{$endif i386}
+                   end;
                  aitconst_got:
                    begin
                      if tai_const(hp).symofs<>0 then
@@ -958,6 +1141,8 @@ implementation
                              WriteDecodedUleb128(qword(tai_const(hp).value));
                            aitconst_sleb128bit:
                              WriteDecodedSleb128(int64(tai_const(hp).value));
+                           else
+                             ;
                          end
                        end
                      else
@@ -1111,14 +1296,6 @@ implementation
 
            ait_symbol :
              begin
-               if (tai_symbol(hp).sym.bind=AB_PRIVATE_EXTERN) then
-                 begin
-                   writer.AsmWrite(#9'.private_extern ');
-                   if replaceforbidden then
-                     writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
-                   else
-                     writer.AsmWriteln(tai_symbol(hp).sym.name);
-                 end;
                if (target_info.system=system_powerpc64_linux) and
                   (tai_symbol(hp).sym.typ=AT_FUNCTION) and
                   (cs_profile in current_settings.moduleswitches) then
@@ -1131,6 +1308,8 @@ implementation
                     writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
                   else
                     writer.AsmWriteln(tai_symbol(hp).sym.name);
+                  if (tai_symbol(hp).sym.bind=AB_PRIVATE_EXTERN) then
+                    WriteHiddenSymbol(tai_symbol(hp).sym);
                 end;
                if (target_info.system=system_powerpc64_linux) and
                   use_dotted_functions and
@@ -1211,14 +1390,26 @@ implementation
                if replaceforbidden then
                  begin
                    { avoid string truncation }
-                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^)+s);
+                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                   writer.AsmWrite(s);
                    writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).value^));
+                   if tai_symbolpair(hp).kind=spk_set_global then
+                     begin
+                       writer.AsmWrite(#9'.globl ');
+                       writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                     end;
                  end
                else
                  begin
                    { avoid string truncation }
-                   writer.AsmWrite(tai_symbolpair(hp).sym^+s);
+                   writer.AsmWrite(tai_symbolpair(hp).sym^);
+                   writer.AsmWrite(s);
                    writer.AsmWriteLn(tai_symbolpair(hp).value^);
+                   if tai_symbolpair(hp).kind=spk_set_global then
+                     begin
+                       writer.AsmWrite(#9'.globl ');
+                       writer.AsmWriteLn(tai_symbolpair(hp).sym^);
+                     end;
                  end;
              end;
            ait_symbol_end :
@@ -1361,6 +1552,22 @@ implementation
                    std_regname(tai_varloc(hp).newlocation)));
                writer.AsmLn;
              end;
+           ait_cfi:
+             begin
+               WriteCFI(tai_cfi_base(hp));
+             end;
+           ait_eabi_attribute:
+             begin
+               case tai_eabi_attribute(hp).eattr_typ of
+                 eattrtype_dword:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+','+tostr(tai_eabi_attribute(hp).value));
+                 eattrtype_ntbs:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+',"'+tai_eabi_attribute(hp).valuestr^+'"');
+                 else
+                   Internalerror(2019100601);
+               end;
+               writer.AsmLn;
+             end;
            else
              internalerror(2006012201);
          end;
@@ -1388,7 +1595,29 @@ implementation
 
     procedure TGNUAssembler.WriteWeakSymbolRef(s: tasmsymbol);
       begin
-        writer.AsmWriteLn(#9'.weak '+s.name);
+        writer.AsmWrite(#9'.weak ');
+        if asminfo^.dollarsign='$' then
+          writer.AsmWriteLn(s.name)
+        else
+          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(s.name))
+      end;
+
+
+    procedure TGNUAssembler.WriteHiddenSymbol(sym: TAsmSymbol);
+      begin
+        { on Windows/(PE)COFF, global symbols are hidden by default: global
+          symbols that are not explicitly exported from an executable/library,
+          become hidden }
+        if target_info.system in systems_windows then
+          exit;
+        if target_info.system in systems_darwin then
+          writer.AsmWrite(#9'.private_extern ')
+        else
+          writer.AsmWrite(#9'.hidden ');
+        if asminfo^.dollarsign='$' then
+          writer.AsmWriteLn(sym.name)
+        else
+          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(sym.name))
       end;
 
 
@@ -1706,6 +1935,8 @@ implementation
                 result:='.section '+objc_section_name(atype);
                 exit
               end;
+            else
+              ;
           end;
         result := inherited sectionname(atype,aname,aorder);
       end;
@@ -1812,7 +2043,9 @@ implementation
          sec_none (* sec_objc_nlcatlist *),
          sec_none (* sec_objc_protlist *),
          sec_none (* sec_stack *),
-         sec_none (* sec_heap *)
+         sec_none (* sec_heap *),
+         sec_none (* gcc_except_table *),
+         sec_none (* sec_arm_attribute *)
         );
       begin
         Result := inherited SectionName (SecXTable [AType], AName, AOrder);

+ 22 - 5
compiler/aopt.pas

@@ -26,6 +26,7 @@ Unit aopt;
 {$i fpcdefs.inc}
 
 { $define DEBUG_OPTALLOC}
+{ $define DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
 
   Interface
 
@@ -35,6 +36,9 @@ Unit aopt;
 
     Type
       TAsmOptimizer = class(TAoptObj)
+        { Pooled object that can be used by optimisation procedures to evaluate
+          future register usage without upsetting the current state. }
+        TmpUsedRegs: TAllUsedRegs;
 
         { _AsmL is the PAasmOutpout list that has to be optimized }
         Constructor create(_AsmL: TAsmList); virtual; reintroduce;
@@ -49,9 +53,9 @@ Unit aopt;
         { Builds a table with the locations of the labels in the TAsmList.
           Also fixes some RegDeallocs like "# %eax released; push (%eax)"  }
         Procedure BuildLabelTableAndFixRegAlloc;
-        procedure clear;
       protected
         procedure pass_1;
+        procedure clear;
       End;
       TAsmOptimizerClass = class of TAsmOptimizer;
 
@@ -75,17 +79,19 @@ Unit aopt;
 
     uses
       cutils,
+      cprofile,
       globtype, globals,
       verbose,
       cpubase,
       cgbase,
-      aoptda,aoptcpu,aoptcpud;
+      aoptcpu;
 
     Constructor TAsmOptimizer.create(_AsmL: TAsmList);
       Begin
         inherited create(_asml,nil,nil,nil);
         { setup labeltable, always necessary }
         New(LabelInfo);
+        CreateUsedRegs(TmpUsedRegs);
       End;
 
     procedure TAsmOptimizer.FindLoHiLabels;
@@ -142,6 +148,7 @@ Unit aopt;
           p := BlockStart;
           While (P <> BlockEnd) Do
             Begin
+              prefetch(pointer(p.Next)^);
               Case p.typ Of
                 ait_Label:
                   begin
@@ -185,7 +192,6 @@ Unit aopt;
                       End
                     else if tai_regalloc(p).ratype=ra_dealloc then
                       Begin
-                        ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                         hp1 := p;
                         hp2 := nil;
                         While Not(assigned(FindRegAlloc(tai_regalloc(p).Reg, tai(hp1.Next)))) And
@@ -226,9 +232,13 @@ Unit aopt;
                             AsmL.remove(p);
                             p.free;
                             p := hp1;
-                          end;
+                          end
+                        else
+                          ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                       End
                   End
+                else
+                  ;
               End;
               P := tai(p.Next);
               While Assigned(p) and
@@ -317,6 +327,7 @@ Unit aopt;
 
     Destructor TAsmOptimizer.Destroy;
       Begin
+        ReleaseUsedRegs(TmpUsedRegs);
         if assigned(LabelInfo^.LabelTable) then
           Freemem(LabelInfo^.LabelTable);
         Dispose(LabelInfo);
@@ -337,6 +348,7 @@ Unit aopt;
         p:=BlockStart;
         while p<>BlockEnd Do
           begin
+            prefetch(pointer(p.Next)^);
             if SchedulerPass1Cpu(p) then
               continue;
             p:=tai(p.next);
@@ -379,9 +391,14 @@ Unit aopt;
       var
         p : TAsmOptimizer;
       begin
+        ResumeTimer(ct_aopt);
         p:=casmoptimizer.Create(AsmL);
         p.Optimize;
-        p.free
+{$ifdef DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
+        p.Debug_InsertInstrRegisterDependencyInfo;
+{$endif DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
+        p.free;
+        StopTimer;
       end;
 
 

+ 49 - 25
compiler/aoptbase.pas

@@ -49,9 +49,9 @@ unit aoptbase;
         { returns true if register Reg is used by instruction p1 }
         Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;virtual;
         { returns true if register Reg occurs in operand op }
-        Function RegInOp(Reg: TRegister; const op: toper): Boolean;
+        class function RegInOp(Reg: TRegister; const op: toper): Boolean; static;
         { returns true if register Reg is used in the reference Ref }
-        Function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+        class function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean; static;
 
         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;virtual;
 
@@ -61,13 +61,13 @@ unit aoptbase;
         { gets the next tai object after current that contains info relevant }
         { to the optimizer in p1. If there is none, it returns false and     }
         { sets p1 to nil                                                     }
-        class Function GetNextInstruction(Current: tai; Var Next: tai): Boolean;
-        { gets the previous tai object after current that contains info  }
-        { relevant to the optimizer in last. If there is none, it retuns }
-        { false and sets last to nil                                     }
-        Function GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+        class function GetNextInstruction(Current: tai; out Next: tai): Boolean; static;
+        { gets the previous tai object after current that contains info   }
+        { relevant to the optimizer in last. If there is none, it returns }
+        { false and sets last to nil                                      }
+        class function GetLastInstruction(Current: tai; out Last: tai): Boolean; static;
 
-        function SkipEntryExitMarker(current: tai; var next: tai): boolean;
+        class function SkipEntryExitMarker(current: tai; out next: tai): boolean; static;
 
         { processor dependent methods }
 
@@ -104,15 +104,15 @@ unit aoptbase;
 
         { compares reg1 and reg2 having the same type and being the same super registers
           so the register size is neglected }
-        function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+        class function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
     end;
 
-    function labelCanBeSkipped(p: tai_label): boolean;
+    function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
 
   implementation
 
     uses
-      verbose,globtype,globals,aoptcpub;
+      verbose,globals,aoptcpub;
 
   constructor taoptbase.create;
     begin
@@ -140,7 +140,7 @@ unit aoptbase;
     End;
 
 
-  Function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
+  class function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
     Begin
       Case op.typ Of
         Top_Reg: RegInOp := SuperRegistersEqual(Reg,op.reg);
@@ -154,12 +154,20 @@ unit aoptbase;
     End;
 
 
-  Function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+  class function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
   Begin
     RegInRef := SuperRegistersEqual(Ref.Base,Reg)
 {$ifdef cpurefshaveindexreg}
     Or SuperRegistersEqual(Ref.Index,Reg)
 {$endif cpurefshaveindexreg}
+{$ifdef x86}
+    or (Reg=Ref.segment)
+    { if Ref.segment isn't set, the cpu uses implicitly ss or ds, depending on the base register }
+    or ((Ref.segment=NR_NO) and (
+      ((Reg=NR_SS) and (SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP))) or
+      ((Reg=NR_DS) and not(SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP)))
+    ))
+{$endif x86}
   End;
 
   Function TAOptBase.RegModifiedByInstruction(Reg: TRegister; p1: tai): Boolean;
@@ -168,26 +176,31 @@ unit aoptbase;
   End;
 
 
-  function labelCanBeSkipped(p: tai_label): boolean;
+  function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
   begin
     labelCanBeSkipped := not(p.labsym.is_used) or (p.labsym.labeltype<>alt_jump);
   end;
 
 
-  class Function TAOptBase.GetNextInstruction(Current: tai; Var Next: tai): Boolean;
+  class function TAOptBase.GetNextInstruction(Current: tai; out Next: tai): Boolean;
   Begin
     Repeat
       Current := tai(Current.Next);
       While Assigned(Current) And
             ((Current.typ In SkipInstr) or
-{$if defined(SPARC) or defined(MIPS)}
+{$ifdef cpudelayslot}
              ((Current.typ=ait_instruction) and
               (taicpu(Current).opcode=A_NOP)
              ) or
-{$endif SPARC or MIPS}
+{$endif cpudelayslot}
              ((Current.typ = ait_label) And
               labelCanBeSkipped(Tai_Label(Current)))) Do
-        Current := tai(Current.Next);
+        begin
+          { this won't help the current loop, but it helps when returning from GetNextInstruction
+            as the next entry is probably already in the cache }
+          prefetch(pointer(Current.Next)^);
+          Current := Tai(Current.Next);
+        end;
       If Assigned(Current) And
          (Current.typ = ait_Marker) And
          (Tai_Marker(Current).Kind = mark_NoPropInfoStart) Then
@@ -195,7 +208,12 @@ unit aoptbase;
           While Assigned(Current) And
                 ((Current.typ <> ait_Marker) Or
                  (Tai_Marker(Current).Kind <> mark_NoPropInfoEnd)) Do
-            Current := Tai(Current.Next);
+            begin
+              { this won't help the current loop, but it helps when returning from GetNextInstruction
+                as the next entry is probably already in the cache }
+              prefetch(pointer(Current.Next)^);
+              Current := Tai(Current.Next);
+            end;
         End;
     Until Not(Assigned(Current)) Or
           (Current.typ <> ait_Marker) Or
@@ -213,7 +231,7 @@ unit aoptbase;
         End;
   End;
 
-  Function TAOptBase.GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+  class function TAOptBase.GetLastInstruction(Current: tai; out Last: tai): Boolean;
   Begin
     Repeat
       Current := Tai(Current.previous);
@@ -255,12 +273,12 @@ unit aoptbase;
   End;
 
 
-  function TAOptBase.SkipEntryExitMarker(current: tai; var next: tai): boolean;
+  class function TAOptBase.SkipEntryExitMarker(current: tai; out next: tai): boolean;
     begin
       result:=true;
+      next:=current;
       if current.typ<>ait_marker then
         exit;
-      next:=current;
       while GetNextInstruction(next,next) do
         begin
           if (next.typ<>ait_marker) or not(tai_marker(next).Kind in [mark_Position,mark_BlockStart]) then
@@ -308,10 +326,16 @@ unit aoptbase;
     end;
 
 
-  function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+  class function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;{$ifdef USEINLINE}inline;{$endif}
   Begin
-    Result:=(getregtype(reg1) = getregtype(reg2)) and
-            (getsupreg(reg1) = getsupreg(Reg2));
+    { Do an optimized version of
+
+      Result:=(getregtype(reg1) = getregtype(reg2)) and
+      (getsupreg(reg1) = getsupreg(Reg2));
+
+      as SuperRegistersEqual is used a lot
+    }
+    Result:=(DWord(reg1) and $ff00ffff)=(DWord(reg2) and $ff00ffff);
   end;
 
   { ******************* Processor dependent stuff *************************** }

+ 2 - 2
compiler/aoptda.pas

@@ -29,7 +29,7 @@ Unit aoptda;
 
     uses
       cpubase,cgbase,
-      aasmbase,aasmtai,aasmdata,aasmcpu,
+      aasmtai,aasmdata,aasmcpu,
       aoptcpub, aoptbase;
 
     Type
@@ -56,7 +56,7 @@ Unit aoptda;
   Implementation
 
     uses
-      globals, aoptobj;
+      globals;
 
     Procedure TAOptDFA.DoDFA;
     { Analyzes the Data Flow of an assembler list. Analyses the reg contents     }

File diff suppressed because it is too large
+ 1198 - 140
compiler/aoptobj.pas


+ 52 - 6
compiler/aoptutils.pas

@@ -27,23 +27,69 @@ unit aoptutils;
   interface
 
     uses
-      aasmtai,aasmcpu;
+      cpubase,aasmtai,aasmcpu;
 
     function MatchOpType(const p : taicpu;type0: toptype) : Boolean;
     function MatchOpType(const p : taicpu;type0,type1 : toptype) : Boolean;
+{$if max_operands>2}
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
+{$endif max_operands>2}
+
+    { skips all labels and returns the next "real" instruction }
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
+
+    { sets hp2 to hp and returns True if hp is not nil }
+    function SetAndTest(const hp: tai; out hp2: tai): Boolean;
 
   implementation
 
-    function MatchOpType(const p : taicpu; type0: toptype) : Boolean;
+    uses
+      aasmbase;
+
+
+    function MatchOpType(const p : taicpu; type0: toptype) : Boolean; inline;
       begin
-        Result:=(p.oper[0]^.typ=type0);
+        Result:=(p.ops=1) and (p.oper[0]^.typ=type0);
       end;
 
 
-    function MatchOpType(const p : taicpu; type0,type1 : toptype) : Boolean;
+    function MatchOpType(const p : taicpu; type0,type1 : toptype) : Boolean; inline;
       begin
-        Result:=(p.oper[0]^.typ=type0) and (p.oper[0]^.typ=type1);
+        Result:=(p.ops=2) and (p.oper[0]^.typ=type0) and (p.oper[1]^.typ=type1);
       end;
 
-end.
 
+{$if max_operands>2}
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean; inline;
+      begin
+        Result:=(p.ops=3) and (p.oper[0]^.typ=type0) and (p.oper[1]^.typ=type1) and (p.oper[2]^.typ=type2);
+      end;
+{$endif max_operands>2}
+
+
+    { skips all labels and returns the next "real" instruction }
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
+      begin
+        while assigned(hp.next) and
+              (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
+          hp := tai(hp.next);
+        if assigned(hp.next) then
+          begin
+            SkipLabels := True;
+            hp2 := tai(hp.next)
+          end
+        else
+          begin
+            hp2 := hp;
+            SkipLabels := False
+          end;
+      end;
+
+    { sets hp2 to hp and returns True if hp is not nil }
+    function SetAndTest(const hp: tai; out hp2: tai): Boolean; inline;
+      begin
+        hp2 := hp;
+        Result := Assigned(hp);
+      end;
+
+end.

+ 248 - 89
compiler/arm/aasmcpu.pas

@@ -76,6 +76,7 @@ uses
       OT_IMMTINY   = $00002100;
       OT_IMMSHIFTER= $00002200;
       OT_IMMEDIATEZERO = $10002200;
+      OT_IMMEDIATEMM     = $00002400;
       OT_IMMEDIATE24 = OT_IMM24;
       OT_SHIFTIMM  = OT_SHIFTEROP or OT_IMMSHIFTER;
       OT_SHIFTIMMEDIATE = OT_SHIFTIMM;
@@ -137,6 +138,10 @@ uses
 
       IF_NONE   = $00000000;
 
+      IF_EXTENSIONS = $0000000F;
+
+      IF_NEON       = $00000001;
+
       IF_ARMMASK    = $000F0000;
       IF_ARM32      = $00010000;
       IF_THUMB      = $00020000;
@@ -197,9 +202,11 @@ uses
          roundingmode : troundingmode;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean=false);
-         procedure loadconditioncode(opidx:longint;const cond:tasmcond);
+         procedure loadconditioncode(opidx:longint;const acond:tasmcond);
          procedure loadmodeflags(opidx:longint;const flags:tcpumodeflags);
          procedure loadspecialreg(opidx:longint;const areg:tregister; const aflags:tspecialregflags);
+         procedure loadrealconst(opidx:longint;const _value:bestreal);
+
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -237,6 +244,8 @@ uses
          { *M*LL }
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
 
+         constructor op_reg_realconst(op : tasmop;_op1: tregister;_op2: bestreal);
+
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
 
@@ -332,6 +341,19 @@ implementation
       end;
 
 
+    procedure taicpu.loadrealconst(opidx:longint;const _value:bestreal);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_realconst then
+              clearop(opidx);
+            val_real:=_value;
+            typ:=top_realconst;
+          end;
+      end;
+
+
     procedure taicpu.loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean);
       var
         i : byte;
@@ -363,19 +385,21 @@ implementation
                    if assigned(add_reg_instruction_hook) and (i in regset^) then
                      add_reg_instruction_hook(self,newreg(R_MMREGISTER,i,regsetsubregtype));
                  end;
+             else
+               internalerror(2019050932);
            end;
          end;
       end;
 
 
-    procedure taicpu.loadconditioncode(opidx:longint;const cond:tasmcond);
+    procedure taicpu.loadconditioncode(opidx:longint;const acond:tasmcond);
       begin
         allocate_oper(opidx+1);
         with oper[opidx]^ do
          begin
            if typ<>top_conditioncode then
              clearop(opidx);
-           cc:=cond;
+           cc:=acond;
            typ:=top_conditioncode;
          end;
       end;
@@ -504,6 +528,15 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_realconst(op : tasmop; _op1 : tregister; _op2 : bestreal);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadrealconst(1,_op2);
+      end;
+
+
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
        begin
          inherited create(op);
@@ -803,7 +836,7 @@ implementation
           end
         else
           case opcode of
-            A_ADC,A_ADD,A_AND,A_BIC,
+            A_ADC,A_ADD,A_AND,A_BIC,A_ORN,
             A_EOR,A_CLZ,A_RBIT,
             A_LDR,A_LDRB,A_LDRBT,A_LDRH,A_LDRSB,
             A_LDRSH,A_LDRT,
@@ -832,6 +865,8 @@ implementation
             A_UXTB,A_UXTH,A_SXTB,A_SXTH,
             A_NEG,
             A_VABS,A_VADD,A_VCVT,A_VDIV,A_VLDR,A_VMOV,A_VMUL,A_VNEG,A_VSQRT,A_VSUB,
+            A_VEOR,
+            A_VMRS,A_VMSR,
             A_MRS,A_MSR:
               if opnr=0 then
                 result:=operand_write
@@ -867,7 +902,9 @@ implementation
                 result := operand_read;
             //Thumb2
             A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS, A_BFI,
-            A_SMMLA,A_SMMLS:
+            A_QADD,
+            A_PKHTB,A_PKHBT,
+            A_SMMLA,A_SMMLS,A_SMUAD,A_SMUSD:
               if opnr in [0] then
                 result:=operand_write
               else
@@ -886,7 +923,10 @@ implementation
             A_STREX:
               result:=operand_write;
             else
-              internalerror(200403151);
+              begin
+                writeln(opcode);
+                internalerror(200403151);
+              end;
           end;
       end;
 
@@ -1114,6 +1154,8 @@ implementation
                                           begin
                                             inc(extradataoffset,multiplier*(((tai_realconst(hp).savesize-4)+3) div 4));
                                           end;
+                                        else
+                                          ;
                                       end;
                                       { check if the same constant has been already inserted into the currently handled list,
                                         if yes, reuse it }
@@ -1123,8 +1165,9 @@ implementation
                                           while assigned(hp2) do
                                             begin
                                               if (hp2.typ=ait_const) and (tai_const(hp2).sym=tai_const(hp).sym)
-                                                and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label)
-                                              then
+                                                and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label) and
+                                                { gottpoff and tlsgd symbols are PC relative, so we cannot reuse them }
+                                                (not(tai_const(hp2).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc])) then
                                                 begin
                                                   with taicpu(curtai).oper[curop]^.ref^ do
                                                     begin
@@ -1172,6 +1215,8 @@ implementation
                 begin
                   inc(curinspos,multiplier*((tai_realconst(hp).savesize+3) div 4));
                 end;
+              else
+                ;
             end;
             { special case for case jump tables }
             penalty:=0;
@@ -1189,9 +1234,9 @@ implementation
                       begin
                         penalty:=multiplier;
                         hp:=tai(hp.next);
-                        { skip register allocations and comments inserted by the optimizer as well as a label
+                        { skip register allocations and comments inserted by the optimizer as well as a label and align
                           as jump tables for thumb might have }
-                        while assigned(hp) and (hp.typ in [ait_comment,ait_regalloc,ait_label]) do
+                        while assigned(hp) and (hp.typ in [ait_comment,ait_regalloc,ait_label,ait_align]) do
                           hp:=tai(hp.next);
                         while assigned(hp) and (hp.typ=ait_const) do
                           begin
@@ -1242,6 +1287,8 @@ implementation
                           or if we splitted them so split before }
                       CheckLimit(hp,4);
                     end;
+                  else
+                    ;
                 end;
               end;
 
@@ -1396,8 +1443,11 @@ implementation
                               end;
                           end;
                       end;
+                    else;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1461,8 +1511,12 @@ implementation
                             taicpu(curtai).ops:=2;
                           end;
                       end;
+                    else
+                      ;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1508,55 +1562,59 @@ implementation
           begin
             case curtai.typ of
               ait_instruction:
-                if IsIT(taicpu(curtai).opcode) then
-                  begin
-                    levels := GetITLevels(taicpu(curtai).opcode);
-                    if levels < 4 then
-                      begin
-                        i:=levels;
-                        hp1:=tai(curtai.Next);
-                        while assigned(hp1) and
-                          (i > 0) do
-                          begin
-                            if hp1.typ=ait_instruction then
-                              begin
-                                dec(i);
-                                if (i = 0) and
-                                  mustbelast(hp1) then
-                                  begin
-                                    hp1:=nil;
-                                    break;
-                                  end;
-                              end;
-                            hp1:=tai(hp1.Next);
-                          end;
+                begin
+                  if IsIT(taicpu(curtai).opcode) then
+                    begin
+                      levels := GetITLevels(taicpu(curtai).opcode);
+                      if levels < 4 then
+                        begin
+                          i:=levels;
+                          hp1:=tai(curtai.Next);
+                          while assigned(hp1) and
+                            (i > 0) do
+                            begin
+                              if hp1.typ=ait_instruction then
+                                begin
+                                  dec(i);
+                                  if (i = 0) and
+                                    mustbelast(hp1) then
+                                    begin
+                                      hp1:=nil;
+                                      break;
+                                    end;
+                                end;
+                              hp1:=tai(hp1.Next);
+                            end;
 
-                        if assigned(hp1) then
-                          begin
-                            // We are pointing at the first instruction after the IT block
-                            while assigned(hp1) and
-                              (hp1.typ<>ait_instruction) do
-                                hp1:=tai(hp1.Next);
-
-                            if assigned(hp1) and
-                              (hp1.typ=ait_instruction) and
-                              IsIT(taicpu(hp1).opcode) then
-                              begin
-                                if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and
-                                  ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or
-                                   (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then
-                                  begin
-                                    taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode,
-                                                                                taicpu(hp1).opcode,
-                                                                                taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc));
+                          if assigned(hp1) then
+                            begin
+                              // We are pointing at the first instruction after the IT block
+                              while assigned(hp1) and
+                                (hp1.typ<>ait_instruction) do
+                                  hp1:=tai(hp1.Next);
+
+                              if assigned(hp1) and
+                                (hp1.typ=ait_instruction) and
+                                IsIT(taicpu(hp1).opcode) then
+                                begin
+                                  if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and
+                                    ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or
+                                     (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then
+                                    begin
+                                      taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode,
+                                                                                  taicpu(hp1).opcode,
+                                                                                  taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc));
 
-                                    list.Remove(hp1);
-                                    hp1.Free;
-                                  end;
-                              end;
-                          end;
-                      end;
-                  end;
+                                      list.Remove(hp1);
+                                      hp1.Free;
+                                    end;
+                                end;
+                            end;
+                        end;
+                    end;
+                end
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1583,6 +1641,8 @@ implementation
                       case taicpu(curtai).opcode of
                         A_AND: taicpu(curtai).opcode:=A_BIC;
                         A_BIC: taicpu(curtai).opcode:=A_AND;
+                        else
+                          internalerror(2019050931);
                       end;
                       taicpu(curtai).oper[2]^.val:=(not taicpu(curtai).oper[2]^.val) and $FFFFFFFF;
                     end
@@ -1595,10 +1655,14 @@ implementation
                       case taicpu(curtai).opcode of
                         A_ADD: taicpu(curtai).opcode:=A_SUB;
                         A_SUB: taicpu(curtai).opcode:=A_ADD;
+                        else
+                          internalerror(2019050930);
                       end;
                       taicpu(curtai).oper[2]^.val:=-taicpu(curtai).oper[2]^.val;
                     end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1646,6 +1710,8 @@ implementation
                       end;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1671,6 +1737,7 @@ implementation
                            (taicpu(curtai).oper[2]^.typ=top_shifterop) then
                           begin
                             case taicpu(curtai).oper[2]^.shifterop^.shiftmode of
+                              SM_NONE: ;
                               SM_LSL: taicpu(curtai).opcode:=A_LSL;
                               SM_LSR: taicpu(curtai).opcode:=A_LSR;
                               SM_ASR: taicpu(curtai).opcode:=A_ASR;
@@ -1707,8 +1774,12 @@ implementation
                       begin
                         taicpu(curtai).opcode:=A_SVC;
                       end;
+                    else
+                      ;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -2151,17 +2222,19 @@ implementation
 
         FPUMasks: array[tfputype] of longword =
           (
-            IF_NONE,
-            IF_NONE,
-            IF_NONE,
-            IF_FPA,
-            IF_FPA,
-            IF_FPA,
-            IF_VFPv2,
-            IF_VFPv2 or IF_VFPv3,
-            IF_VFPv2 or IF_VFPv3,
-            IF_NONE,
-            IF_VFPv2 or IF_VFPv3 or IF_VFPv4
+            { fpu_none       } IF_NONE,
+            { fpu_soft       } IF_NONE,
+            { fpu_libgcc     } IF_NONE,
+            { fpu_fpa        } IF_FPA,
+            { fpu_fpa10      } IF_FPA,
+            { fpu_fpa11      } IF_FPA,
+            { fpu_vfpv2      } IF_VFPv2,
+            { fpu_vfpv3      } IF_VFPv2 or IF_VFPv3,
+            { fpu_neon_vfpv3 } IF_VFPv2 or IF_VFPv3 or IF_NEON,
+            { fpu_vfpv3_d16  } IF_VFPv2 or IF_VFPv3,
+            { fpu_fpv4_s16   } IF_NONE,
+            { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
+            { fpu_neon_vfpv4 } IF_VFPv2 or IF_VFPv3 or IF_VFPv4 or IF_NEON
           );
       begin
         fArmVMask:=Masks[current_settings.cputype] or FPUMasks[current_settings.fputype];
@@ -2363,6 +2436,10 @@ implementation
                 begin
                   ot:=OT_MODEFLAGS;
                 end;
+              top_realconst:
+                begin
+                  ot:=OT_IMMEDIATEMM;
+                end;
               else
                 internalerror(2004022623);
             end;
@@ -2719,6 +2796,8 @@ implementation
         refoper : poper;
         msb : longint;
         r: byte;
+        singlerec : tcompsinglerec;
+        doublerec : tcompdoublerec;
 
       procedure setshifterop(op : byte);
         var
@@ -2937,6 +3016,7 @@ implementation
           shift:=0;
           typ:=0;
           case oper[op]^.shifterop^.shiftmode of
+            SM_None: ;
             SM_LSL: begin typ:=0; shift:=oper[op]^.shifterop^.shiftimm; end;
             SM_LSR: begin typ:=1; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
             SM_ASR: begin typ:=2; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
@@ -2977,13 +3057,23 @@ implementation
                 begin
                   currsym:=objdata.symbolref(oper[0]^.ref^.symbol);
 
-                  bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
+                  { tlscall is not relative so ignore the offset }
+                  if oper[0]^.ref^.refaddr<>addr_tlscall then
+                    bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
 
                   if (opcode<>A_BL) or (condition<>C_None) then
                     objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_24)
                   else
-                    objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
-
+                    case oper[0]^.ref^.refaddr of
+                      addr_pic:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_ARM_CALL);
+                      addr_full:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
+                      addr_tlscall:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_TLS_CALL);
+                      else
+                        Internalerror(2019092903);
+                    end;
                   exit;
                 end;
             end;
@@ -3881,36 +3971,76 @@ implementation
                   end;
                 PF_F32:
                   begin
-                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) or
-                       (getregtype(oper[1]^.reg)<>R_MMREGISTER) then
+                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) then
                       Message(asmw_e_invalid_opcode_and_operands);
 
+                    case oper[1]^.typ of
+                      top_realconst:
+                        begin
+                          if not(IsVFPFloatImmediate(s32real,oper[1]^.val_real)) then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          singlerec.value:=oper[1]^.val_real;
+                          singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+
+                          bytes:=bytes or ((singlerec.bytes[2] shr 3) and $f);
+                          bytes:=bytes or (DWord((singlerec.bytes[2] shr 7) and $1) shl 16) or (DWord(singlerec.bytes[3] and $3) shl 17) or (DWord((singlerec.bytes[3] shr 7) and $1) shl 19);
+                        end;
+                      top_reg:
+                        begin
+                          if getregtype(oper[1]^.reg)<>R_MMREGISTER then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          Rm:=getmmreg(oper[1]^.reg);
+                          bytes:=bytes or (((Rm and $1E) shr 1) shl 0);
+                          bytes:=bytes or ((Rm and $1) shl 5);
+                        end;
+                      else
+                        Message(asmw_e_invalid_opcode_and_operands);
+                    end;
                     Rd:=getmmreg(oper[0]^.reg);
-                    Rm:=getmmreg(oper[1]^.reg);
 
                     bytes:=bytes or (((Rd and $1E) shr 1) shl 12);
                     bytes:=bytes or ((Rd and $1) shl 22);
 
-                    bytes:=bytes or (((Rm and $1E) shr 1) shl 0);
-                    bytes:=bytes or ((Rm and $1) shl 5);
                   end;
                 PF_F64:
                   begin
-                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) or
-                       (getregtype(oper[1]^.reg)<>R_MMREGISTER) then
+                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) then
                       Message(asmw_e_invalid_opcode_and_operands);
 
+                    case oper[1]^.typ of
+                      top_realconst:
+                        begin
+                          if not(IsVFPFloatImmediate(s64real,oper[1]^.val_real)) then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          doublerec.value:=oper[1]^.val_real;
+                          doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+
+                          //      32c:       eeb41b00        vmov.f64        d1, #64 ; 0x40
+
+                          // 32c:       eeb61b00        vmov.f64        d1, #96 ; 0x60
+                          bytes:=bytes or (doublerec.bytes[6] and $f);
+                          bytes:=bytes or (DWord((doublerec.bytes[6] shr 4) and $7) shl 16) or (DWord((doublerec.bytes[7] shr 7) and $1) shl 19);
+                        end;
+                      top_reg:
+                        begin
+                          if getregtype(oper[1]^.reg)<>R_MMREGISTER then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          Rm:=getmmreg(oper[1]^.reg);
+                          bytes:=bytes or (Rm and $F);
+                          bytes:=bytes or ((Rm and $10) shl 1);
+                        end;
+                      else
+                        Message(asmw_e_invalid_opcode_and_operands);
+                    end;
                     Rd:=getmmreg(oper[0]^.reg);
-                    Rm:=getmmreg(oper[1]^.reg);
 
                     bytes:=bytes or (1 shl 8);
 
                     bytes:=bytes or ((Rd and $F) shl 12);
                     bytes:=bytes or (((Rd and $10) shr 4) shl 22);
-
-                    bytes:=bytes or (Rm and $F);
-                    bytes:=bytes or ((Rm and $10) shl 1);
                   end;
+                else
+                  Message(asmw_e_invalid_opcode_and_operands);
               end;
             end;
           #$41,#$91: // VMRS/VMSR
@@ -4071,6 +4201,8 @@ implementation
                         d:=(rd shr 4) and 1;
                         rd:=rd and $F;
                       end;
+                    else
+                      internalerror(2019050929);
                   end;
 
                   m:=0;
@@ -4091,6 +4223,8 @@ implementation
                         m:=(rm shr 4) and 1;
                         rm:=rm and $F;
                       end;
+                    else
+                      internalerror(2019050928);
                   end;
 
                   bytes:=bytes or (Rd shl 12);
@@ -4107,6 +4241,8 @@ implementation
                     PF_F64S32,
                     PF_F64U32:
                       bytes:=bytes or (1 shl 8);
+                    else
+                      ;
                   end;
 
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64] then
@@ -4115,6 +4251,8 @@ implementation
                         PF_S32F64,
                         PF_S32F32:
                           bytes:=bytes or (1 shl 16);
+                        else
+                          ;
                       end;
 
                       bytes:=bytes or (1 shl 18);
@@ -4185,9 +4323,9 @@ implementation
 
                         rn:=16;
                       end;
-                  else
-                    Rn:=0;
-                    message(asmw_e_invalid_opcode_and_operands);
+                    else
+                      Rn:=0;
+                      message(asmw_e_invalid_opcode_and_operands);
                   end;
 
                   case oppostfix of
@@ -4199,10 +4337,10 @@ implementation
                         bytes:=bytes or (1 shl 8);
                         D:=(rd shr 4) and $1; Rd:=Rd and $F;
                       end;
-                  else
-                    begin
-                      D:=rd and $1; Rd:=Rd shr 1;
-                    end;
+                    else
+                      begin
+                        D:=rd and $1; Rd:=Rd shr 1;
+                      end;
                   end;
 
                   case oppostfix of
@@ -4211,6 +4349,8 @@ implementation
                     PF_F64U16,PF_F32U16,
                     PF_F32U32,PF_F64U32:
                       bytes:=bytes or (1 shl 16);
+                    else
+                      ;
                   end;
 
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64,PF_S16F32,PF_S16F64,PF_U16F32,PF_U16F64] then
@@ -4263,6 +4403,8 @@ implementation
                       bytes:=bytes or (1 shl 23);
                     PF_DB,PF_DBS,PF_DBD,PF_DBX:
                       bytes:=bytes or (2 shl 23);
+                    else
+                      ;
                   end;
 
                   case oppostfix of
@@ -4271,6 +4413,8 @@ implementation
                         bytes:=bytes or (1 shl 8);
                         bytes:=bytes or (1 shl 0); // Offset is odd
                       end;
+                    else
+                      ;
                   end;
 
                   dp_operation:=(oper[1]^.subreg=R_SUBFD);
@@ -4562,6 +4706,8 @@ implementation
                         bytes:=bytes or ((oper[2]^.val shr 2) and $7F);
                       end;
                   end;
+                else
+                  internalerror(2019050926);
               end;
             end;
           #$65: { Thumb load/store }
@@ -4698,6 +4844,8 @@ implementation
                     else
                       bytes:=bytes or (getsupreg(oper[0]^.reg) shl 8);
                   end;
+                else
+                  internalerror(2019050925);
               end;
             end;
           #$6A: { Thumb: IT }
@@ -5303,6 +5451,8 @@ implementation
               case oppostfix of
                 PF_None,PF_IA,PF_FD: bytes:=bytes or ($1 shl 23);
                 PF_DB,PF_EA: bytes:=bytes or ($2 shl 23);
+              else
+                message1(asmw_e_invalid_opcode_and_operands, '"Invalid Postfix"');
               end;
             end;
           #$8D: { Thumb-2: BL/BLX }
@@ -5450,9 +5600,13 @@ implementation
                     bytes:=bytes or (1 shl 24);
 
                   case oppostfix of
+                    PF_S: bytes:=bytes or (0 shl 22) or (0 shl 15);
                     PF_D: bytes:=bytes or (0 shl 22) or (1 shl 15);
                     PF_E: bytes:=bytes or (1 shl 22) or (0 shl 15);
                     PF_P: bytes:=bytes or (1 shl 22) or (1 shl 15);
+                    PF_EP: ;
+                    else
+                      message1(asmw_e_invalid_opcode_and_operands, '"Invalid postfix"');
                   end;
                 end
               else
@@ -5527,6 +5681,7 @@ implementation
                 end;
 
               case roundingmode of
+                RM_NONE: ;
                 RM_P: bytes:=bytes or (1 shl 5);
                 RM_M: bytes:=bytes or (2 shl 5);
                 RM_Z: bytes:=bytes or (3 shl 5);
@@ -5554,6 +5709,7 @@ implementation
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 12);
 
                     case roundingmode of
+                      RM_NONE: ;
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
@@ -5573,6 +5729,7 @@ implementation
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 0);
 
                     case roundingmode of
+                      RM_NONE: ;
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
@@ -5602,6 +5759,8 @@ implementation
                         Message(asmw_e_invalid_opcode_and_operands);
                       end;
                   end;
+                else
+                  Message1(asmw_e_invalid_opcode_and_operands, '"Unsupported opcode"');
               end;
             end;
           #$fe: // No written data

+ 53 - 15
compiler/arm/agarmgas.pas

@@ -49,6 +49,7 @@ unit agarmgas;
 
       TArmAppleGNUAssembler=class(TAppleGNUassembler)
         constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
         procedure WriteExtraHeader; override;
       end;
 
@@ -94,7 +95,9 @@ unit agarmgas;
       begin
         inherited;
         InstrWriter := TArmInstrWriter.create(self);
+{$ifndef llvm}
         if GenerateThumb2Code then
+{$endif}
           TArmInstrWriter(InstrWriter).unified_syntax:=true;
       end;
 
@@ -102,18 +105,26 @@ unit agarmgas;
     function TArmGNUAssembler.MakeCmdLine: TCmdStr;
       begin
         result:=inherited MakeCmdLine;
-        if (current_settings.fputype = fpu_soft) then
-          result:='-mfpu=softvfp '+result;
-        if (current_settings.fputype = fpu_vfpv2) then
-          result:='-mfpu=vfpv2 '+result;
-        if (current_settings.fputype = fpu_vfpv3) then
-          result:='-mfpu=vfpv3 '+result;
-        if (current_settings.fputype = fpu_vfpv3_d16) then
-          result:='-mfpu=vfpv3-d16 '+result;
-        if (current_settings.fputype = fpu_fpv4_s16) then
-          result:='-mfpu=fpv4-sp-d16 '+result;
-        if (current_settings.fputype = fpu_vfpv4) then
-          result:='-mfpu=vfpv4 '+result;
+        case current_settings.fputype of
+          fpu_soft:
+            result:='-mfpu=softvfp '+result;
+          fpu_vfpv2:
+            result:='-mfpu=vfpv2 '+result;
+          fpu_vfpv3:
+            result:='-mfpu=vfpv3 '+result;
+          fpu_neon_vfpv3:
+            result:='-mfpu=neon-vfpv3 '+result;
+          fpu_vfpv3_d16:
+            result:='-mfpu=vfpv3-d16 '+result;
+          fpu_fpv4_s16:
+            result:='-mfpu=fpv4-sp-d16 '+result;
+          fpu_vfpv4:
+            result:='-mfpu=vfpv4 '+result;
+          fpu_neon_vfpv4:
+            result:='-mfpu=neon-vfpv4 '+result;
+          else
+            ;
+        end;
 
         if GenerateThumb2Code then
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
@@ -124,7 +135,11 @@ unit agarmgas;
 
         if target_info.abi = abi_eabihf then
           { options based on what gcc uses on debian armhf }
-          result:='-mfloat-abi=hard -meabi=5 '+result;
+          result:='-mfloat-abi=hard -meabi=5 '+result
+        else if (target_info.abi = abi_eabi) and not(current_settings.fputype = fpu_soft) then
+          result:='-mfloat-abi=softfp -meabi=5 '+result
+        else if (target_info.abi = abi_eabi) and (current_settings.fputype = fpu_soft) then
+          result:='-mfloat-abi=soft -meabi=5 '+result;
       end;
 
     procedure TArmGNUAssembler.WriteExtraHeader;
@@ -146,6 +161,18 @@ unit agarmgas;
       end;
 
 
+    function TArmAppleGNUAssembler.MakeCmdLine: TCmdStr;
+      begin
+        result:=inherited MakeCmdLine;
+	if (asminfo^.id = as_clang) then
+          begin
+            if fputypestrllvm[current_settings.fputype] <> '' then
+              result:='-m'+fputypestrllvm[current_settings.fputype]+' '+result;
+            { Apple arm always uses softfp floating point ABI }
+            result:='-mfloat-abi=softfp '+result;
+          end;
+      end;
+
     procedure TArmAppleGNUAssembler.WriteExtraHeader;
       begin
         inherited WriteExtraHeader;
@@ -180,7 +207,9 @@ unit agarmgas;
                 if offset<>0 then
                   s:=s+tostr_with_plus(offset);
                 if refaddr=addr_pic then
-                  s:=s+'(PLT)';
+                  s:=s+'(PLT)'
+                else if refaddr=addr_tlscall then
+                  s:=s+'(tlscall)';
               end
             else
               begin
@@ -201,6 +230,8 @@ unit agarmgas;
                        s:=s+', rrx'
                      else if shiftmode <> SM_None then
                        s:=s+', '+gas_shiftmode2str[shiftmode]+' #'+tostr(shiftimm);
+                     if offset<>0 then
+                       Internalerror(2019012601);
                   end
                 else if offset<>0 then
                   s:=s+', #'+tostr(offset);
@@ -210,6 +241,8 @@ unit agarmgas;
                     s:=s+']';
                   AM_PREINDEXED:
                     s:=s+']!';
+                  else
+                    ;
                 end;
               end;
 
@@ -318,6 +351,11 @@ unit agarmgas;
                   if srF in o.specialflags then getopstr:=getopstr+'f';
                   if srS in o.specialflags then getopstr:=getopstr+'s';
                 end;
+            end;
+          top_realconst:
+            begin
+              str(o.val_real,Result);
+              Result:='#'+Result;
             end
           else
             internalerror(2002070604);
@@ -402,7 +440,7 @@ unit agarmgas;
             idtxt  : 'AS';
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
-            supported_targets : [system_arm_linux,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
+            supported_targets : [system_arm_linux,system_arm_netbsd,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
                                  system_arm_embedded,system_arm_symbian,system_arm_android,system_arm_aros];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';

+ 233 - 41
compiler/arm/aoptcpu.pas

@@ -25,7 +25,7 @@ Unit aoptcpu;
 
 {$i fpcdefs.inc}
 
-{$define DEBUG_PREREGSCHEDULER}
+{ $define DEBUG_PREREGSCHEDULER}
 {$define DEBUG_AOPTCPU}
 
 Interface
@@ -34,6 +34,9 @@ uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
 
 Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
+    { Can't be done in some cases due to the limited range of jumps }
+    function CanDoJumpOpts: Boolean; override;
+
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
@@ -83,6 +86,10 @@ Implementation
     cgobj,procinfo,
     aasmbase,aasmdata;
 
+{ Range check must be disabled explicitly as conversions between signed and unsigned
+  32-bit values are done without explicit typecasts }
+{$R-}
+
   function CanBeCond(p : tai) : boolean;
     begin
       result:=
@@ -113,7 +120,9 @@ Implementation
         (r1.signindex = r2.signindex) and
         (r1.shiftimm = r2.shiftimm) and
         (r1.addressmode = r2.addressmode) and
-        (r1.shiftmode = r2.shiftmode);
+        (r1.shiftmode = r2.shiftmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
     end;
 
   function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
@@ -218,7 +227,7 @@ Implementation
       if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
                           A_CMP, A_CMN, A_TST, A_TEQ,
                           A_B, A_BL, A_BX, A_BLX,
-                          A_SMLAL, A_UMLAL]) then i:=0;
+                          A_SMLAL, A_UMLAL, A_VSTM, A_VLDM]) then i:=0;
 
       while(i<p.ops) do
         begin
@@ -235,6 +244,8 @@ Implementation
               instructionLoadsFromReg :=
                 (p.oper[I]^.ref^.base = reg) or
                 (p.oper[I]^.ref^.index = reg);
+            else
+              ;
           end;
           if instructionLoadsFromReg then exit; {Bailout if we found something}
           Inc(I);
@@ -294,6 +305,8 @@ Implementation
         A_POP:
           Result := (getsupreg(reg) in p.oper[0]^.regset^) or
                                    (reg=NR_STACK_POINTER_REG);
+        else
+          ;
       end;
 
       if Result then
@@ -310,6 +323,8 @@ Implementation
           Result :=
             (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
             (taicpu(p).oper[0]^.ref^.base = reg);
+        else
+          ;
       end;
     end;
 
@@ -367,6 +382,16 @@ Implementation
     end;
 {$endif DEBUG_AOPTCPU}
 
+
+  function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
+    begin
+      { Cannot perform these jump optimisations if the ARM architecture has 16-bit thumb codes }
+      Result := not (
+        (current_settings.instructionset = is_thumb) and not (CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype])
+      );
+    end;
+
+
   function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
     var
       alloc,
@@ -433,6 +458,19 @@ Implementation
 
               { finally get rid of the mov }
               taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              then
+                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
               asml.remove(movp);
               movp.free;
             end;
@@ -447,9 +485,11 @@ Implementation
       hp1 : tai;
     begin
       Result:=false;
-      if (MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) or
-          ((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
-          ((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
+      if ((MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+           ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) or (taicpu(p).opcode=A_VLDR))
+          ) or
+          (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
+          (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
          ) and
          (taicpu(movp).ops=2) and
          MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
@@ -494,6 +534,17 @@ Implementation
                   IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
                 end;
 
+              { change
+                  vldr reg0,[reg1]
+                  vmov reg2,reg0
+                into
+                  ldr reg2,[reg1]
+
+                if reg2 is an int register
+              }
+              if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
+                taicpu(p).opcode:=A_LDR;
+
               { finally get rid of the mov }
               taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
               asml.remove(movp);
@@ -627,7 +678,6 @@ Implementation
     var
       hp1,hp2,hp3,hp4: tai;
       i, i2: longint;
-      TmpUsedRegs: TAllUsedRegs;
       tempop: tasmop;
       oldreg: tregister;
       dealloc: tai_regalloc;
@@ -654,6 +704,9 @@ Implementation
             if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
                                  A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
               GetNextInstruction(p, hp1) and
+              { mlas is only allowed in arm mode }
+              ((taicpu(p).opcode<>A_MLA) or
+               (current_settings.instructionset<>is_thumb)) and
               MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
               (taicpu(hp1).oper[1]^.typ = top_const) and
               (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
@@ -910,7 +963,7 @@ Implementation
                           MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
                           MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
                           begin
-                            CopyUsedRegs(TmpUsedRegs);
+                            TransferUsedRegs(TmpUsedRegs);
                             UpdateUsedRegs(TmpUsedRegs, tai(p.next));
                             UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
                             if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
@@ -924,7 +977,6 @@ Implementation
                                 p:=hp2;
                                 Result:=true;
                               end;
-                            ReleaseUsedRegs(TmpUsedRegs);
                           end
                         { fold
                           mov reg1,reg0, shift imm1
@@ -1188,9 +1240,52 @@ Implementation
                       ....
                     }
                     if (taicpu(p).ops = 2) and
-                       GetNextInstruction(p,hp1) and
+                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (tai(hp1).typ = ait_instruction) then
                       begin
+                        {
+                          This removes the mul from
+                          mov rX,0
+                          ...
+                          mul ...,rX,...
+                        }
+                        if false and (taicpu(p).oper[1]^.typ = top_const) and
+                          (taicpu(p).oper[1]^.val=0) and
+                          MatchInstruction(hp1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                          (((taicpu(hp1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^)) or
+                           ((taicpu(hp1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^))) then
+                            begin
+                              TransferUsedRegs(TmpUsedRegs);
+                              UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                              UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                              DebugMsg('Peephole MovMUL/MLA2Mov0 done', p);
+                              if taicpu(hp1).opcode=A_MUL then
+                                taicpu(hp1).loadconst(1,0)
+                              else
+                                taicpu(hp1).loadreg(1,taicpu(hp1).oper[3]^.reg);
+                              taicpu(hp1).ops:=2;
+                              taicpu(hp1).opcode:=A_MOV;
+                              if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                                RemoveCurrentP(p);
+                              Result:=true;
+                              exit;
+                            end
+                        else if (taicpu(p).oper[1]^.typ = top_const) and
+                          (taicpu(p).oper[1]^.val=0) and
+                          MatchInstruction(hp1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[3]^) then
+                            begin
+                              TransferUsedRegs(TmpUsedRegs);
+                              UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                              UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                              DebugMsg('Peephole MovMLA2MUL 1 done', p);
+                              taicpu(hp1).ops:=3;
+                              taicpu(hp1).opcode:=A_MUL;
+                              if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                                RemoveCurrentP(p);
+                              Result:=true;
+                              exit;
+                            end
                         {
                           This changes the very common
                           mov r0, #0
@@ -1200,7 +1295,7 @@ Implementation
 
                           and removes all superfluous mov instructions
                         }
-                        if (taicpu(p).oper[1]^.typ = top_const) and
+                        else if (taicpu(p).oper[1]^.typ = top_const) and
                            (taicpu(hp1).opcode=A_STR) then
                           while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
                                 MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
@@ -1328,7 +1423,7 @@ Implementation
                         if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
                           taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
 
-                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, taicpu(p.Next));
+                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
                         if Assigned(dealloc) then
                           begin
                             asml.remove(dealloc);
@@ -1434,6 +1529,9 @@ Implementation
                                   hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
                                        taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
                                        taicpu(p).oper[2]^.shifterop^);
+                              if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
+                                AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hp1,UsedRegs);
+                              AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                               asml.insertbefore(hp2, hp1);
                               GetNextInstruction(p, hp2);
                               asml.remove(p);
@@ -1544,7 +1642,14 @@ Implementation
                 A_ORR,
                 A_MLA,
                 A_MLS,
-                A_MUL:
+                A_MUL,
+                A_QADD,A_QADD16,A_QADD8,
+                A_QSUB,A_QSUB16,A_QSUB8,
+                A_QDADD,A_QDSUB,A_QASX,A_QSAX,
+                A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
+                A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
+                A_PKHTB,A_PKHBT,
+                A_SMUAD,A_SMUSD:
                   begin
                         {
                           optimize
@@ -1573,6 +1678,7 @@ Implementation
                             if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
                               begin
                                 DebugMsg('Peephole AndAnd2And done', p);
+                                AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
                                 taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
                                 taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
@@ -1583,12 +1689,12 @@ Implementation
                             else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                               begin
                                 DebugMsg('Peephole AndAnd2And done', hp1);
+                                AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                                 taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
                                 taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                                 GetNextInstruction(p, hp1);
-                                asml.remove(p);
-                                p.free;
+                                RemoveCurrentP(p);
                                 p:=hp1;
                                 Result:=true;
                               end;
@@ -1613,9 +1719,9 @@ Implementation
                           begin
                             DebugMsg('Peephole AndStrb2Strb done', p);
                             taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+                            AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                             GetNextInstruction(p, hp1);
-                            asml.remove(p);
-                            p.free;
+                            RemoveCurrentP(p);
                             p:=hp1;
                             result:=true;
                           end
@@ -1872,6 +1978,7 @@ Implementation
                           begin
                             taicpu(hp1).opcode:=A_MLS;
 
+
                             taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
 
                             if taicpu(hp1).ops=2 then
@@ -1882,11 +1989,12 @@ Implementation
                             taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
 
                             DebugMsg('MulSub2MLS done', p);
+                            AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
+                            AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
+                            AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
 
                             taicpu(hp1).ops:=4;
-
-                            asml.remove(p);
-                            p.free;
+                            RemoveCurrentP(p);
                             p:=hp1;
                           end;
 
@@ -1948,6 +2056,7 @@ Implementation
                       strb reg1,[...]
                     }
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
                       assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
@@ -1973,6 +2082,7 @@ Implementation
                       uxtb reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       (taicpu(hp1).ops = 2) and
@@ -1982,6 +2092,7 @@ Implementation
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                         DebugMsg('Peephole UxtbUxth2Uxtb done', p);
+                        AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
                         taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
                         asml.remove(hp1);
                         hp1.free;
@@ -1996,6 +2107,7 @@ Implementation
                       uxtb reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
                       (taicpu(hp1).ops = 2) and
@@ -2005,6 +2117,7 @@ Implementation
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                         DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
+                        AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
                         taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
                         asml.remove(hp1);
                         hp1.free;
@@ -2019,8 +2132,8 @@ Implementation
                       uxtb reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       (taicpu(p).ops=2) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=3) and
                       (taicpu(hp1).oper[2]^.typ=top_const) and
@@ -2055,6 +2168,7 @@ Implementation
                       strh reg1,[...]
                     }
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
@@ -2080,6 +2194,7 @@ Implementation
                       uxth reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=2) and
@@ -2089,6 +2204,7 @@ Implementation
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                         DebugMsg('Peephole UxthUxth2Uxth done', p);
+                        AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                         taicpu(hp1).opcode:=A_UXTH;
                         taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                         GetNextInstruction(p, hp1);
@@ -2106,6 +2222,7 @@ Implementation
                       uxth reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=3) and
@@ -2213,6 +2330,31 @@ Implementation
                         DebugMsg('Peephole Bl2B done', p);
                       end;
                   end;
+                A_VMOV:
+                  begin
+                    {
+                      change
+                      vmov reg0,reg1,reg2
+                      vmov reg1,reg2,reg0
+                      into
+                      vmov reg0,reg1,reg2
+
+                      can be applied regardless if reg0 or reg2 is the vfp register
+                    }
+                    if (taicpu(p).ops = 3) and
+                      GetNextInstruction(p, hp1) and
+                      MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                      (taicpu(hp1).ops = 3) and
+                      MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
+                      MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
+                      MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) then
+                      begin
+                        asml.Remove(hp1);
+                        hp1.free;
+                        DebugMsg('Peephole VMovVMov2VMov done', p);
+                      end;
+                  end;
+                A_VLDR,
                 A_VADD,
                 A_VMUL,
                 A_VDIV,
@@ -2226,8 +2368,12 @@ Implementation
                       RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then
                       Result:=true;
                   end
+                else
+                  ;
               end;
           end;
+        else
+          ;
       end;
     end;
 
@@ -2277,7 +2423,9 @@ Implementation
                            (l<=4) and
                            CanBeCond(hp1) and
                            { stop on labels }
-                           not(hp1.typ=ait_label) do
+                           not(hp1.typ=ait_label) and
+                           { avoid that we cannot recognize the case BccB2Cond }
+                           not((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B)) do
                            begin
                               inc(l);
                               if MustBeLast(hp1) then
@@ -2312,6 +2460,7 @@ Implementation
                                       until not(assigned(hp1)) or
                                         not(CanBeCond(hp1)) or
                                         (hp1.typ=ait_label);
+                                      DebugMsg('Peephole Bcc2Cond done',hp2);
                                       { wait with removing else GetNextInstruction could
                                         ignore the label if it was the only usage in the
                                         jump moved away }
@@ -2347,17 +2496,24 @@ Implementation
                                     (taicpu(hp2).condition=C_None) and
                                     { real label and jump, no further references to the
                                       label are allowed }
-                                    (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
+                                    (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
                                     FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
                                      begin
                                        l:=0;
                                        { skip hp1 to <several moves 2> }
                                        GetNextInstruction(hp1, hp1);
                                        while assigned(hp1) and
-                                         CanBeCond(hp1) do
+                                         CanBeCond(hp1) and
+                                         (l<=3) do
                                          begin
                                            inc(l);
-                                           GetNextInstruction(hp1, hp1);
+                                           if MustBeLast(hp1) then
+                                             begin
+                                               GetNextInstruction(hp1, hp1);
+                                               break;
+                                             end
+                                           else
+                                             GetNextInstruction(hp1, hp1);
                                          end;
                                        { hp1 points to yyy: }
                                        if assigned(hp1) and
@@ -2370,32 +2526,35 @@ Implementation
                                             repeat
                                               if hp1.typ=ait_instruction then
                                                 taicpu(hp1).condition:=condition;
-                                              GetNextInstruction(hp1,hp1);
+                                              if MustBeLast(hp1) then
+                                                begin
+                                                  GetNextInstruction(hp1, hp1);
+                                                  break;
+                                                end
+                                              else
+                                                GetNextInstruction(hp1, hp1);
                                             until not(assigned(hp1)) or
-                                              not(CanBeCond(hp1));
+                                              not(CanBeCond(hp1)) or
+                                              ((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B));
                                             { hp2 is still at jmp yyy }
                                             GetNextInstruction(hp2,hp1);
-                                            { hp2 is now at xxx: }
+                                            { hp1 is now at xxx: }
                                             condition:=inverse_cond(condition);
                                             GetNextInstruction(hp1,hp1);
                                             { hp1 is now at <several movs 2> }
                                             repeat
-                                              taicpu(hp1).condition:=condition;
+                                              if hp1.typ=ait_instruction then
+                                                taicpu(hp1).condition:=condition;
                                               GetNextInstruction(hp1,hp1);
                                             until not(assigned(hp1)) or
                                               not(CanBeCond(hp1)) or
                                               (hp1.typ=ait_label);
-                                            {
-                                            asml.remove(hp1.next)
-                                            hp1.next.free;
-                                            asml.remove(hp1);
-                                            hp1.free;
-                                            }
+                                            DebugMsg('Peephole BccB2Cond done',hp3);
                                             { remove Bcc }
                                             tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
                                             asml.remove(hp3);
                                             hp3.free;
-                                            { remove jmp }
+                                            { remove B }
                                             tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
                                             asml.remove(hp2);
                                             hp2.free;
@@ -2405,8 +2564,12 @@ Implementation
                                 end;
                            end;
                       end;
+                  else
+                    ;
                 end;
               end;
+            else
+              ;
           end;
           p := tai(p.next)
         end;
@@ -2443,7 +2606,7 @@ Implementation
           exit;
         regtype:=getregtype(reg);
         supreg:=getsupreg(reg);
-        if (cg.rg[regtype].live_end[supreg]=hp1) and
+        if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_end[supreg]=hp1) and
           RegInInstruction(reg,p) then
           cg.rg[regtype].live_end[supreg]:=p;
       end;
@@ -2458,7 +2621,7 @@ Implementation
           exit;
         regtype:=getregtype(reg);
         supreg:=getsupreg(reg);
-        if (cg.rg[regtype].live_start[supreg]=p) and
+        if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_start[supreg]=p) and
           RegInInstruction(reg,hp1) then
          cg.rg[regtype].live_start[supreg]:=hp1;
       end;
@@ -2486,6 +2649,8 @@ Implementation
             for r:=RS_R0 to RS_R15 do
                if r in p.oper[i]^.regset^ then
                  CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
+          else
+            ;
         end;
 
       { if live of any reg used by hp1 ends at hp1 and p uses this register then
@@ -2505,6 +2670,8 @@ Implementation
             for r:=RS_R0 to RS_R15 do
                if r in hp1.oper[i]^.regset^ then
                  CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
+          else
+            ;
         end;
     end;
 
@@ -2513,6 +2680,15 @@ Implementation
 
   { TODO : schedule also forward }
   { TODO : schedule distance > 1 }
+
+    { returns true if p might be a load of a pc relative tls offset }
+    function PossibleTLSLoad(const p: tai) : boolean;
+      begin
+        Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
+          (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
+          (taicpu(p).oper[1]^.ref^.index=NR_PC)));
+      end;
+
     var
       hp1,hp2,hp3,hp4,hp5,insertpos : tai;
       list : TAsmList;
@@ -2553,7 +2729,11 @@ Implementation
             ) and
             GetNextInstruction(hp1,hp2) and
             (hp2.typ=ait_instruction) and
-            { loaded register used by next instruction? }
+            { loaded register used by next instruction?
+
+              if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
+              the bl may not be scheduled away from the bl) and it needs to be taken care of this case
+            }
             (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
             { loaded register not used by previous instruction? }
             not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
@@ -2569,7 +2749,9 @@ Implementation
             ) and
             { if we modify the basereg AND the first instruction used that reg, we can not schedule }
             ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
-             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
+             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
+            not(PossibleTLSLoad(p)) and
+            not(PossibleTLSLoad(hp1)) then
             begin
               hp3:=tai(p.Previous);
               hp5:=tai(p.next);
@@ -2690,7 +2872,11 @@ Implementation
                       A_ITETT:
                         if l=4 then taicpu(hp).opcode := A_ITET;
                       A_ITTTT:
-                        if l=4 then taicpu(hp).opcode := A_ITTT;
+                        begin
+                          if l=4 then taicpu(hp).opcode := A_ITTT;
+                        end
+                      else
+                        ;
                     end;
 
                   break;
@@ -2921,8 +3107,12 @@ Implementation
                                 end;
                            end;
                       end;
+                  else
+                    ;
                 end;
               end;
+            else
+              ;
           end;
           p := tai(p.next)
         end;
@@ -3073,6 +3263,8 @@ Implementation
                 SM_LSR: taicpu(p).opcode:=A_LSR;
                 SM_ASR: taicpu(p).opcode:=A_ASR;
                 SM_ROR: taicpu(p).opcode:=A_ROR;
+                else
+                  internalerror(2019050912);
               end;
 
               if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then

+ 19 - 6
compiler/arm/aoptcpub.pas

@@ -76,10 +76,6 @@ Const
 
   MaxCh = 3;
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 4;
-
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 
@@ -123,14 +119,31 @@ Implementation
       i : Longint;
     begin
       result:=false;
+      case taicpu(p1).opcode of
+        A_LDR:
+          begin
+            { special handling for LDRD }
+            if (taicpu(p1).oppostfix=PF_D) and (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(Reg)) then
+              begin
+                result:=true;
+                exit;
+              end;
+          end;
+        else
+          ;
+      end;
       for i:=0 to taicpu(p1).ops-1 do
         case taicpu(p1).oper[i]^.typ of
           top_reg:
             if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
               exit(true);
           top_ref:
-            if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
-              exit(true);
+            begin
+              if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
+                exit(true);
+            end
+          else
+            ;
         end;
     end;
 

+ 1 - 0
compiler/arm/armatt.inc

@@ -324,6 +324,7 @@
 'svc',
 'bxj',
 'udf',
+'veor',
 'tan',
 'sqt',
 'suf',

+ 1 - 0
compiler/arm/armatts.inc

@@ -353,5 +353,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 15 - 1
compiler/arm/armins.dat

@@ -321,6 +321,7 @@ reg32,memam2              \x17\x04\x50                   ARM32,ARMv4
 reglo,memam3              \x65\x58\x0\2                  THUMB,ARMv4T
 reglo,memam4              \x66\x68\x0\2                  THUMB,ARMv4T
 reglo,memam5              \x67\x98\x0\2                  THUMB,ARMv4T
+reglo,memam2              \x67\x98\x0\2                  THUMB,ARMv4T
 reglo,memam6              \x67\x48\x0\2                  THUMB,ARMv4T
 reg32,memam2              \x88\xF8\x50\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2              \x17\x04\x10                   ARM32,ARMv4
@@ -402,6 +403,7 @@ reg32,regf          \x10\x01\x0F                        ARM32,ARMv4
 regf,reg32          \x96\xF3\x80\x80\x0                 THUMB32,ARMv6
 
 regf,reg32          \x12\x01\x20\xF0                    ARM32,ARMv4
+regs,reg32          \x12\x01\x20\xF0                    ARM32,ARMv4
 regf,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 regs,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 
@@ -428,7 +430,9 @@ reg32,immshifter       \xB\x1\xE0                       ARM32,ARMv4
 
 [VMOVcc]
 vreg,vreg         \x90\xEE\xB0\xA\x40            THUMB32,VFPv2
-vreg,vreg         \x40\xE\xB0\xA\x40            ARM32,VFPv2
+vreg,vreg         \x40\xE\xB0\xA\x40             ARM32,VFPv2
+vreg,immmm         \x90\xEE\xB0\xA\x0             THUMB32,VFPv3
+vreg,immmm         \x40\xE\xB0\xA\x0              ARM32,VFPv3
 
 reg32,vreg        \x90\xEE\x10\xA\x10            THUMB32,VFPv2
 vreg,reg32        \x90\xEE\x00\xA\x10            THUMB32,VFPv2
@@ -540,6 +544,7 @@ reg32,reglist		          \x26\x80			   ARM32,ARMv4
 reglo,memam3                \x65\x50\x0\2                  THUMB,ARMv4T
 reglo,memam4                \x66\x60\x0\2                  THUMB,ARMv4T
 reglo,memam5                \x67\x90\x0\2                  THUMB,ARMv4T
+reglo,memam2                \x67\x90\x0\2                  THUMB,ARMv4T
 reg32,memam2                \x88\xF8\x40\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2                \x17\x04\x00                   ARM32,ARMv4
 
@@ -1481,8 +1486,12 @@ vreg,vreg               \x43\xEE\xB8\xA\x40          THUMB32,VFPv2
 vreg,vreg               \x43\xE\xB8\xA\x40           ARM32,VFPv2
 
 [FMDRRcc]
+vreg,reg32,reg32        \x90\xEC\x40\xB\x10          THUMB32,VFPv2
+vreg,reg32,reg32        \x40\xC\x40\xB\x10           ARM32,VFPv2
 
 [FMRRDcc]
+reg32,reg32,vreg        \x90\xEC\x50\xB\x10          THUMB32,VFPv2
+reg32,reg32,vreg        \x40\xC\x50\xB\x10           ARM32,VFPv2
 
 ; Thumb-2
 
@@ -1722,6 +1731,11 @@ reg32              \x3\x01\x2F\xFF\x20             ARM32,ARMv5TEJ
 immshifter           \x61\xDE\x0                   THUMB,ARMv4T
 void                 void                          ARM32,ARMv4T
 
+; NEON/Advanced SIMD
+
+[VEOR]
+vreg,vreg,vreg              \x42\xF3\x00\x01\x10   ARM32,NEON
+
 ; FPA
 
 

+ 1 - 1
compiler/arm/armnop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armins.dat }
-952;
+962;

+ 1 - 0
compiler/arm/armop.inc

@@ -324,6 +324,7 @@ A_NEG,
 A_SVC,
 A_BXJ,
 A_UDF,
+A_VEOR,
 A_TAN,
 A_SQT,
 A_SUF,

+ 70 - 0
compiler/arm/armtab.inc

@@ -1043,6 +1043,13 @@
     code    : #103#152#0#2;
     flags   : if_thumb or if_armv4t
   ),
+  (
+    opcode  : A_LDR;
+    ops     : 2;
+    optypes : (ot_reglo,ot_memoryam2,ot_none,ot_none,ot_none,ot_none);
+    code    : #103#152#0#2;
+    flags   : if_thumb or if_armv4t
+  ),
   (
     opcode  : A_LDR;
     ops     : 2;
@@ -1351,6 +1358,13 @@
     code    : #18#1#32#240;
     flags   : if_arm32 or if_armv4
   ),
+  (
+    opcode  : A_MSR;
+    ops     : 2;
+    optypes : (ot_regs,ot_reg32,ot_none,ot_none,ot_none,ot_none);
+    code    : #18#1#32#240;
+    flags   : if_arm32 or if_armv4
+  ),
   (
     opcode  : A_MSR;
     ops     : 2;
@@ -1470,6 +1484,20 @@
     code    : #64#14#176#10#64;
     flags   : if_arm32 or if_vfpv2
   ),
+  (
+    opcode  : A_VMOV;
+    ops     : 2;
+    optypes : (ot_vreg,ot_immediatemm,ot_none,ot_none,ot_none,ot_none);
+    code    : #144#238#176#10#0;
+    flags   : if_thumb32 or if_vfpv3
+  ),
+  (
+    opcode  : A_VMOV;
+    ops     : 2;
+    optypes : (ot_vreg,ot_immediatemm,ot_none,ot_none,ot_none,ot_none);
+    code    : #64#14#176#10#0;
+    flags   : if_arm32 or if_vfpv3
+  ),
   (
     opcode  : A_VMOV;
     ops     : 2;
@@ -1995,6 +2023,13 @@
     code    : #103#144#0#2;
     flags   : if_thumb or if_armv4t
   ),
+  (
+    opcode  : A_STR;
+    ops     : 2;
+    optypes : (ot_reglo,ot_memoryam2,ot_none,ot_none,ot_none,ot_none);
+    code    : #103#144#0#2;
+    flags   : if_thumb or if_armv4t
+  ),
   (
     opcode  : A_STR;
     ops     : 2;
@@ -5439,6 +5474,34 @@
     code    : #67#14#184#10#64;
     flags   : if_arm32 or if_vfpv2
   ),
+  (
+    opcode  : A_FMDRR;
+    ops     : 3;
+    optypes : (ot_vreg,ot_reg32,ot_reg32,ot_none,ot_none,ot_none);
+    code    : #144#236#64#11#16;
+    flags   : if_thumb32 or if_vfpv2
+  ),
+  (
+    opcode  : A_FMDRR;
+    ops     : 3;
+    optypes : (ot_vreg,ot_reg32,ot_reg32,ot_none,ot_none,ot_none);
+    code    : #64#12#64#11#16;
+    flags   : if_arm32 or if_vfpv2
+  ),
+  (
+    opcode  : A_FMRRD;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_vreg,ot_none,ot_none,ot_none);
+    code    : #144#236#80#11#16;
+    flags   : if_thumb32 or if_vfpv2
+  ),
+  (
+    opcode  : A_FMRRD;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_vreg,ot_none,ot_none,ot_none);
+    code    : #64#12#80#11#16;
+    flags   : if_arm32 or if_vfpv2
+  ),
   (
     opcode  : A_POP;
     ops     : 1;
@@ -6279,6 +6342,13 @@
     code    : #0;
     flags   : if_arm32 or if_armv4t
   ),
+  (
+    opcode  : A_VEOR;
+    ops     : 3;
+    optypes : (ot_vreg,ot_vreg,ot_vreg,ot_none,ot_none,ot_none);
+    code    : #66#243#0#1#16;
+    flags   : if_arm32 or if_neon
+  ),
   (
     opcode  : A_TAN;
     ops     : 2;

+ 261 - 136
compiler/arm/cgcpu.pas

@@ -42,7 +42,9 @@ unit cgcpu;
         cgsetflags : boolean;
 
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
-        procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
+       protected
+         procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
+       public
         procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
 
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
@@ -59,6 +61,8 @@ unit cgcpu;
         procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
         procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
 
+        procedure g_check_for_fpu_exception(list : TAsmList; force,clear : boolean); override;
+
         procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const paraloc : TCGPara);override;
         {  comparison operations }
         procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
@@ -107,13 +111,15 @@ unit cgcpu;
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
 
-        { clear out potential overflow bits from 8 or 16 bit operations  }
-        { the upper 24/16 bits of a register after an operation          }
+        { clear out potential overflow bits from 8 or 16 bit operations
+          the upper 24/16 bits of a register after an operation          }
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
 
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
 
+
+        procedure g_maybe_tls_init(list : TAsmList); override;
       end;
 
       { tcgarm is shared between normal arm and thumb-2 }
@@ -241,6 +247,10 @@ unit cgcpu;
        procinfo,cpupi,
        paramgr;
 
+{ Range check must be disabled explicitly as conversions between signed and unsigned
+  32-bit values are done without explicit typecasts }
+{$R-}
+
 
     function get_fpu_postfix(def : tdef) : toppostfix;
       begin
@@ -290,7 +300,7 @@ unit cgcpu;
           non-overlapping subregs per register, so we can only use
           half the single precision registers for now (as sub registers of the
           double precision ones). }
-        if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
+        if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
@@ -405,7 +415,7 @@ unit cgcpu;
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                    else
                      usedtmpref:=ref;
@@ -437,7 +447,7 @@ unit cgcpu;
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                    else
                      usedtmpref:=ref;
@@ -526,7 +536,7 @@ unit cgcpu;
                   begin
                     { offset in the wrapper needs to be adjusted for the stored
                       return address }
-                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint),[]);
+                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
                     if is_shifter_const(ioffset,shift) then
                       a_op_const_ref(list,OP_SUB,size,ioffset,href)
                     else
@@ -565,52 +575,16 @@ unit cgcpu;
       end;
 
 
-    procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
-      var
-        tmpref, ref: treference;
-        location: pcgparalocation;
-        sizeleft: aint;
+    procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
       begin
-        location := paraloc.location;
-        tmpref := r;
-        sizeleft := paraloc.intsize;
-        while assigned(location) do
+        { doubles in softemu mode have a strange order of registers and references }
+        if (cgpara.size=OS_F64) and
+           (location^.size=OS_32) then
           begin
-            paramanager.allocparaloc(list,location);
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-              LOC_REFERENCE:
-                begin
-                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment,[]);
-                  { doubles in softemu mode have a strange order of registers and references }
-                  if location^.size=OS_32 then
-                    g_concatcopy(list,tmpref,ref,4)
-                  else
-                    begin
-                      g_concatcopy(list,tmpref,ref,sizeleft);
-                      if assigned(location^.next) then
-                        internalerror(2005010710);
-                    end;
-                end;
-              LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                case location^.size of
-                   OS_F32, OS_F64:
-                     a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                   else
-                     internalerror(2002072801);
-                end;
-              LOC_VOID:
-                begin
-                  // nothing to do
-                end;
-              else
-                internalerror(2002081103);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location := location^.next;
-          end;
+            g_concatcopy(list,ref,paralocref,4)
+          end
+        else
+          inherited;
       end;
 
 
@@ -645,15 +619,9 @@ unit cgcpu;
         r : treference;
         sym : TAsmSymbol;
       begin
-        { check not really correct: should only be used for non-Thumb cpus }
-        // if (CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype]) and
-        //   { WinCE GNU AS (not sure if this applies in general) does not support BLX imm }
-        // (target_info.system<>system_arm_wince) then
-        //   branchopcode:=A_BLX
-        // else
         { use always BL as newer binutils do not translate blx apparently
           generating BL is also what clang and gcc do by default }
-          branchopcode:=A_BL;
+        branchopcode:=A_BL;
         if not(weak) then
           sym:=current_asmdata.RefAsmSymbol(s,AT_FUNCTION)
         else
@@ -932,9 +900,11 @@ unit cgcpu;
               a_load_const_reg(list, size, a, dst);
               exit;
             end;
+          else
+            ;
         end;
         ovloc.loc:=LOC_VOID;
-        if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
+        if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
           case op of
             OP_ADD:
               begin
@@ -946,6 +916,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
               end
+            else
+              ;
           end;
 
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
@@ -994,6 +966,8 @@ unit cgcpu;
                       ovloc.resflags:=F_CS;
                     OP_SUB:
                       ovloc.resflags:=F_CC;
+                    else
+                      internalerror(2019050922);
                   end;
                 end;
           end
@@ -1037,7 +1011,7 @@ unit cgcpu;
             { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
               into the following instruction}
             else if (op = OP_AND) and
-                    is_continuous_mask(a, lsb, width) and
+                    is_continuous_mask(aword(a), lsb, width) and
                     ((lsb = 0) or ((lsb + width) = 32)) then
               begin
                 shifterop_reset(so);
@@ -1689,7 +1663,7 @@ unit cgcpu;
                 end;
               LOC_REFERENCE :
                 begin
-                  reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment,[]);
+                  reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
                   { concatcopy should choose the best way to copy the data }
                   g_concatcopy(list,href,href2,tcgsize2size[hloc^.size]);
                 end;
@@ -1748,6 +1722,34 @@ unit cgcpu;
        end;
 
 
+    procedure tbasecgarm.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l: TAsmLabel;
+      begin
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            not(FPUARM_HAS_EXCEPTION_TRAPPING in fpu_capabilities[current_settings.fputype]) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_FMRX,r,NR_FPSCR));
+            list.concat(setoppostfix(taicpu.op_reg_reg_const(A_AND,r,r,$9f),PF_S));
+            current_asmdata.getjumplabel(l);
+            ai:=taicpu.op_sym(A_B,l);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
+
     {  comparison operations }
     procedure tbasecgarm.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
       l : tasmlabel);
@@ -1905,6 +1907,10 @@ unit cgcpu;
             firstfloatreg:=RS_NO;
             mmregs:=[];
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa11:
@@ -1920,16 +1926,22 @@ unit cgcpu;
                         inc(registerarea,12);
                       end;
                 end;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
+              else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
                 begin;
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
                     they have numbers>$1f which is not really correct as they should simply have the same numbers
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
-                end;
+                end
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                begin;
+                  { the *[0..15] is a hack to prevent that the compiler tries to save odd single-type registers,
+                    they have numbers>$1f which is not really correct as they should simply have the same numbers
+                    as the even ones by with a different subtype as it is done on x86 with al/ah }
+                  mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..15];
+                end
+              else
+                internalerror(2019050924);
             end;
             a_reg_alloc(list,NR_STACK_POINTER_REG);
             if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
@@ -2072,7 +2084,7 @@ unit cgcpu;
              begin
                reference_reset(ref,4,[]);
                if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                  (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
+                 (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
                  begin
                    if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                      begin
@@ -2099,10 +2111,7 @@ unit cgcpu;
                      list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
                        lastfloatreg-firstfloatreg+1,ref));
                    end;
-                 fpu_vfpv2,
-                 fpu_vfpv3,
-                 fpu_vfpv4,
-                 fpu_vfpv3_d16:
+                 else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                    begin
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
@@ -2113,10 +2122,12 @@ unit cgcpu;
                        postfix:=PF_IAD;}
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
-                   end;
+                   end
+                 else
+                   internalerror(2019050923);
                end;
              end;
-        end;
+          end;
       end;
 
 
@@ -2143,6 +2154,10 @@ unit cgcpu;
             mmregs:=[];
             saveregs:=[];
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa11:
@@ -2161,17 +2176,16 @@ unit cgcpu;
                         }
                       end;
                 end;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                begin;
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                begin
                   { restore vfp registers? }
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
                     they have numbers>$1f which is not really correct as they should simply have the same numbers
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
-                end;
+                end
+              else
+                internalerror(2019050926);
             end;
 
             if (firstfloatreg<>RS_NO) or
@@ -2179,7 +2193,7 @@ unit cgcpu;
               begin
                 reference_reset(ref,4,[]);
                 if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                   (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16]) then
+                   (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
                   begin
                     if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                       begin
@@ -2205,10 +2219,7 @@ unit cgcpu;
                       list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
                         lastfloatreg-firstfloatreg+1,ref));
                     end;
-                  fpu_vfpv2,
-                  fpu_vfpv3,
-                  fpu_vfpv4,
-                  fpu_vfpv3_d16:
+                  else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                     begin
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
@@ -2219,7 +2230,9 @@ unit cgcpu;
                         mmpostfix:=PF_IAD;}
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
-                    end;
+                    end
+                  else
+                    internalerror(2019050921);
                 end;
               end;
 
@@ -2478,6 +2491,19 @@ unit cgcpu;
                     a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
                 indirection_done:=true;
               end
+            else if ref.refaddr=addr_gottpoff then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsgd then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsdesc then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tpoff then
+              begin
+                if assigned(ref.relsymbol) or (ref.offset<>0) then
+                  Internalerror(2019092804);
+
+                current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+              end
             else if (cs_create_pic in current_settings.moduleswitches) then
               if (tf_pic_uses_got in target_info.flags) then
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
@@ -2518,6 +2544,11 @@ unit cgcpu;
                (tf_pic_uses_got in target_info.flags) and
                assigned(ref.symbol) then
               begin
+                {$ifdef EXTDEBUG}
+                if not (pi_needs_got in current_procinfo.flags) then
+                	Comment(V_warning,'pi_needs_got not included');
+                {$endif EXTDEBUG}
+                Include(current_procinfo.flags,pi_needs_got);
                 reference_reset(tmpref,4,[]);
                 tmpref.base:=current_procinfo.got;
                 tmpref.index:=tmpreg;
@@ -2580,9 +2611,9 @@ unit cgcpu;
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -2684,6 +2715,21 @@ unit cgcpu;
           list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
         end;
 
+      { save estimation, if a creating a separate ref is needed or
+        if we can keep the original reference while copying }
+      function SimpleRef(const ref : treference) : boolean;
+        begin
+          result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
+              ((ref.symbol=nil) and
+               (ref.addressmode=AM_OFFSET) and
+               (((ref.offset>=0) and (ref.offset+len<=31)) or
+                (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
+                { ldrh has a limited offset range }
+                (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
+               )
+              );
+        end;
+
       { will never be called with count<=4 }
       procedure genloop_thumb(count : aword;size : byte);
 
@@ -2790,19 +2836,17 @@ unit cgcpu;
           begin
             tmpregi:=0;
 
-            srcreg:=getintregister(list,OS_ADDR);
-
-            { explicit pc relative addressing, could be
-              e.g. a floating point constant }
-            if source.base=NR_PC then
+            { loading address in a separate register needed? }
+            if SimpleRef(source) then
               begin
                 { ... then we don't need a loadaddr }
                 srcref:=source;
               end
             else
               begin
+                srcreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,source,srcreg);
-                reference_reset_base(srcref,srcreg,0,source.alignment,source.volatility);
+                reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
               end;
 
             while (len div 4 <> 0) and (tmpregi<maxtmpreg) do
@@ -2814,9 +2858,15 @@ unit cgcpu;
                 dec(len,4);
               end;
 
-            destreg:=getintregister(list,OS_ADDR);
-            a_loadaddr_ref_reg(list,dest,destreg);
-            reference_reset_base(dstref,destreg,0,dest.alignment,dest.volatility);
+            { loading address in a separate register needed? }
+            if SimpleRef(dest) then
+              dstref:=dest
+            else
+              begin
+                destreg:=getintregister(list,OS_ADDR);
+                a_loadaddr_ref_reg(list,dest,destreg);
+                reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
+              end;
             tmpregi2:=1;
             while (tmpregi2<=tmpregi) do
               begin
@@ -2884,11 +2934,11 @@ unit cgcpu;
               begin{unaligned & 4<len<helpsize **or** aligned/unaligned & len>helpsize}
                 destreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,dest,destreg);
-                reference_reset_base(dstref,destreg,0,dest.alignment,dest.volatility);
+                reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
 
                 srcreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,source,srcreg);
-                reference_reset_base(srcref,srcreg,0,source.alignment,source.volatility);
+                reference_reset_base(srcref,srcreg,0,dest.temppos,source.alignment,source.volatility);
 
                 countreg:=getintregister(list,OS_32);
 
@@ -2948,7 +2998,7 @@ unit cgcpu;
               if not((def.typ=pointerdef) or
                     ((def.typ=orddef) and
                      (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
-                                               pasbool8,pasbool16,pasbool32,pasbool64]))) then
+                                               pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
                  ai.SetCondition(C_VC)
               else
                 if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
@@ -3051,7 +3101,11 @@ unit cgcpu;
         list.concat(instr);
         case instr.opcode of
           A_VMOV:
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
             add_move_instruction(instr);
+          else
+            { VCVT can generate an exception }
+            maybe_check_for_fpu_exception(list);
         end;
       end;
 
@@ -3081,6 +3135,10 @@ unit cgcpu;
               if (fromsize<>tosize) then
                 internalerror(2009112901);
             end;
+          OS_F32,OS_F64:
+            ;
+          else
+            internalerror(2019050920);
         end;
 
         if (fromsize<>tosize) then
@@ -3108,9 +3166,7 @@ unit cgcpu;
             end;
           end
         else
-          begin
-             handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
-          end;
+          handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
 
         if (tmpmmreg<>reg) then
           a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
@@ -3142,6 +3198,10 @@ unit cgcpu;
               if (fromsize<>tosize) then
                 internalerror(2009112901);
             end;
+          OS_F32,OS_F64:
+            ;
+          else
+            internalerror(2019050919);
         end;
 
         if (fromsize<>tosize) then
@@ -3172,9 +3232,8 @@ unit cgcpu;
             end;
           end
         else
-          begin
-             handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
-          end;
+          handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
+        { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
       end;
 
 
@@ -3190,6 +3249,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
           internalerror(2009112516);
         list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3205,6 +3265,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
           internalerror(2009112514);
         list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3218,20 +3279,30 @@ unit cgcpu;
           case op of
             OP_XOR:
               begin
-                if (src<>dst) or
-                   (reg_cgsize(src)<>size) or
-                   assigned(shuffle) then
-                  internalerror(2009112907);
-                tmpreg:=getintregister(list,OS_32);
-                a_load_const_reg(list,OS_32,0,tmpreg);
-                case size of
-                  OS_F32:
-                    list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
-                  OS_F64:
-                    list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
-                  else
-                    internalerror(2009112908);
-                end;
+                if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
+                  begin
+                    if (reg_cgsize(src)<>size) or
+                       assigned(shuffle) then
+                      internalerror(2019081301);
+                    list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
+                  end
+                else
+                  begin
+                    if (src<>dst) or
+                       (reg_cgsize(src)<>size) or
+                       assigned(shuffle) then
+                      internalerror(2009112907);
+                    tmpreg:=getintregister(list,OS_32);
+                    a_load_const_reg(list,OS_32,0,tmpreg);
+                    case size of
+                      OS_F32:
+                        list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
+                      OS_F64:
+                        list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
+                      else
+                        internalerror(2009112908);
+                    end;
+                  end;
               end
             else
               internalerror(2009112906);
@@ -3273,6 +3344,18 @@ unit cgcpu;
       end;
 
 
+    procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
+      begin
+        if pi_needs_tls in current_procinfo.flags then
+          begin
+            list.concat(tai_regalloc.alloc(NR_R0,nil));
+            a_call_name(list,'fpc_read_tp',false);
+            a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
+            list.concat(tai_regalloc.dealloc(NR_R0,nil));
+          end;
+      end;
+
+
     procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       begin
         case op of
@@ -3323,6 +3406,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112405);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3333,6 +3417,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112406);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3346,6 +3431,8 @@ unit cgcpu;
           OP_NEG,
           OP_NOT :
             internalerror(2012022501);
+          else
+            ;
         end;
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
           begin
@@ -3410,6 +3497,8 @@ unit cgcpu;
                     ovloc.resflags:=F_CS;
                   OP_SUB:
                     ovloc.resflags:=F_CC;
+                  else
+                    internalerror(2019050918);
                 end;
               end;
           end
@@ -3483,6 +3572,8 @@ unit cgcpu;
           OP_NEG,
           OP_NOT :
             internalerror(2012022502);
+          else
+            ;
         end;
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
           begin
@@ -3511,6 +3602,8 @@ unit cgcpu;
                     ovloc.resflags:=F_CS;
                   OP_SUB:
                     ovloc.resflags:=F_CC;
+                  else
+                    internalerror(2019050917);
                 end;
               end;
           end
@@ -3794,7 +3887,7 @@ unit cgcpu;
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                    else
                      usedtmpref:=ref;
@@ -3827,7 +3920,7 @@ unit cgcpu;
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                    else
                      usedtmpref:=ref;
@@ -3941,7 +4034,7 @@ unit cgcpu;
                   begin
                     { offset in the wrapper needs to be adjusted for the stored
                       return address }
-                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),sizeof(pint),[]);
+                    reference_reset_base(href,reference.index,reference.offset+sizeof(aint),ctempposinvalid,sizeof(pint),[]);
                     if is_thumb_imm(ioffset) then
                       a_op_const_ref(list,OP_SUB,size,ioffset,href)
                     else
@@ -4008,7 +4101,7 @@ unit cgcpu;
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
 
-            reference_reset_base(href,tmpreg,0,ref.alignment,ref.volatility);
+            reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
           end
         else if (op=A_LDR) and
            (oppostfix in [PF_None]) and
@@ -4018,7 +4111,7 @@ unit cgcpu;
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
 
-            reference_reset_base(href,tmpreg,0,ref.alignment,ref.volatility);
+            reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
           end
         else if (op=A_LDR) and
            ((oppostfix in [PF_SH,PF_SB]) or
@@ -4027,7 +4120,7 @@ unit cgcpu;
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
 
-            reference_reset_base(href,tmpreg,0,ref.alignment,ref.volatility);
+            reference_reset_base(href,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
           end;
 
         Result:=inherited handle_load_store(list, op, oppostfix, reg, href);
@@ -4086,6 +4179,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
               end
+            else
+              ;
           end;
 
         if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
@@ -4105,6 +4200,8 @@ unit cgcpu;
                   OP_SUB:
                     //!!! ovloc.resflags:=F_CC;
                     ;
+                  else
+                    ;
                 end;
               end;
           end
@@ -4231,13 +4328,13 @@ unit cgcpu;
         rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
 
-        if current_settings.fputype in [fpu_vfpv3,fpu_vfpv4] then
+        if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
               ],first_mm_imreg,[])
-        else if current_settings.fputype in [fpu_fpv4_s16,fpu_vfpv3_d16] then
+        else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
@@ -4345,7 +4442,7 @@ unit cgcpu;
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                    else
                      usedtmpref:=ref;
@@ -4377,7 +4474,7 @@ unit cgcpu;
                      begin
                        tmpreg2:=getintregister(list,OS_INT);
                        a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment,ref.volatility);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.temppos,ref.alignment,ref.volatility);
                      end
                    else
                      usedtmpref:=ref;
@@ -4434,6 +4531,11 @@ unit cgcpu;
               OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
               OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
               OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
+              OS_32,
+              OS_S32:
+                ;
+              else
+                internalerror(2019050916);
             end;
           end
         else
@@ -4449,7 +4551,7 @@ unit cgcpu;
         l1 : longint;
       begin
         ovloc.loc:=LOC_VOID;
-        if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
+        if (a<>-2147483648) and is_shifter_const(-a,shift) then
           case op of
             OP_ADD:
               begin
@@ -4461,6 +4563,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
               end
+            else
+              ;
           end;
 
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
@@ -4565,6 +4669,8 @@ unit cgcpu;
                       ovloc.resflags:=F_CS;
                     OP_SUB:
                       ovloc.resflags:=F_CC;
+                    else
+                      ;
                   end;
                 end;
           end
@@ -4622,7 +4728,7 @@ unit cgcpu;
               list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
             else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
               list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
-            else if (op = OP_AND) and is_continuous_mask(not(a), shift, width) then
+            else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
               begin
                 a_load_reg_reg(list,size,size,src,dst);
                 list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
@@ -4953,7 +5059,7 @@ unit cgcpu;
               end;
 
             if regs=[] then
-              list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
+              list.concat(taicpu.op_reg(A_BX,NR_R14))
             else
               begin
                 reference_reset(ref,4,[]);
@@ -4963,7 +5069,7 @@ unit cgcpu;
               end;
           end
         else
-          list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
+          list.concat(taicpu.op_reg(A_BX,NR_R14));
       end;
 
 
@@ -5028,7 +5134,21 @@ unit cgcpu;
                 cg.a_label(current_procinfo.aktlocaldata,l);
                 tmpref.symboldata:=current_procinfo.aktlocaldata.last;
 
-                current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
+                if ref.refaddr=addr_gottpoff then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsgd then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsdesc then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tpoff then
+                  begin
+                    if assigned(ref.relsymbol) or (ref.offset<>0) then
+                      Internalerror(2019092805);
+
+                    current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+                  end
+                else
+                  current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
 
                 { load consts entry }
                 tmpref.symbol:=l;
@@ -5139,6 +5259,7 @@ unit cgcpu;
             instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
             list.Concat(instr);
             add_move_instruction(instr);
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
           end
         else if (fromsize=OS_F64) and
           (tosize=OS_F64) then
@@ -5164,6 +5285,7 @@ unit cgcpu;
     procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
       begin
         handle_load_store(list,A_VSTR,PF_None,reg,ref);
+        { VSTR cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -5181,7 +5303,10 @@ unit cgcpu;
       begin
         if //(shuffle=nil) and
           (fromsize=OS_F32) then
-          list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
+          begin
+            list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
+          end
         else
           internalerror(2012100814);
       end;

+ 76 - 20
compiler/arm/cpubase.pas

@@ -113,9 +113,6 @@ unit cpubase;
 
       VOLATILE_INTREGISTERS_DARWIN = [RS_R0..RS_R3,RS_R9,RS_R12..RS_R14];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                           Instruction post fixes
 *****************************************************************************}
@@ -341,20 +338,6 @@ unit cpubase;
 *****************************************************************************}
 
     const
-      { Registers which must be saved when calling a routine declared as
-        cppdecl, cdecl, stdcall, safecall, palmossyscall. The registers
-        saved should be the ones as defined in the target ABI and / or GCC.
-
-        This value can be deduced from the CALLED_USED_REGISTERS array in the
-        GCC source.
-      }
-      saved_standard_registers : array[0..6] of tsuperregister =
-        (RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,RS_R10);
-
-      { this is only for the generic code which is not used for this architecture }
-      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
-      saved_mm_registers : array[0..0] of tsuperregister = (RS_INVALID);
-
       { Required parameter alignment when calling a routine declared as
         stdcall and cdecl. The alignment value should be the one defined
         by GCC or the target ABI.
@@ -382,6 +365,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
@@ -391,8 +377,11 @@ unit cpubase;
       doesn't handle ROR_C detection }
     function is_thumb32_imm(d : aint) : boolean;
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean;
-    function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;
+    function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
     function dwarf_reg(r:tregister):shortint;
+    function dwarf_reg_no_error(r:tregister):shortint;
+    function eh_return_data_regno(nr: longint): longint;
+
 
     function IsIT(op: TAsmOp) : boolean;
     function GetITLevels(op: TAsmOp) : longint;
@@ -401,6 +390,8 @@ unit cpubase;
     function GenerateThumbCode : boolean;
     function GenerateThumb2Code : boolean;
 
+    function IsVFPFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+
   implementation
 
     uses
@@ -427,8 +418,11 @@ unit cpubase;
           R_MMREGISTER:
             begin
               case s of
+                { records passed in MM registers }
+                OS_32,
                 OS_F32:
                   cgsize2subreg:=R_SUBFS;
+                OS_64,
                 OS_F64:
                   cgsize2subreg:=R_SUBFD;
                 else
@@ -549,6 +543,26 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
       var
          i : longint;
@@ -621,7 +635,7 @@ unit cpubase;
           end;
       end;
     
-    function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;
+    function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
       var
         msb : byte;
       begin
@@ -630,7 +644,7 @@ unit cpubase;
         
         width:=msb-lsb+1;
         
-        result:=(lsb<>255) and (msb<>255) and ((((1 shl (msb-lsb+1))-1) shl lsb) = d);
+        result:=(lsb<>255) and (msb<>255) and (aword(((1 shl (msb-lsb+1))-1) shl lsb) = d);
       end;
 
 
@@ -666,6 +680,19 @@ unit cpubase;
           internalerror(200603251);
       end;
 
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
+
+    function eh_return_data_regno(nr: longint): longint;
+      begin
+        if (nr>=0) and (nr<2) then
+          result:=nr
+        else
+          result:=-1;
+      end;
+
       { Low part of 64bit return value }
     function NR_FUNCTION_RESULT64_LOW_REG: tregister; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     begin
@@ -750,5 +777,34 @@ unit cpubase;
       end;
 
 
+    function IsVFPFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+      var
+        singlerec : tcompsinglerec;
+        doublerec : tcompdoublerec;
+      begin
+        Result:=false;
+        case ft of
+          s32real:
+            begin
+              singlerec.value:=value;
+              singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+              Result:=(singlerec.bytes[0]=0) and (singlerec.bytes[1]=0) and ((singlerec.bytes[2] and 7)=0)  and
+                (((singlerec.bytes[3] and $7e)=$40) or ((singlerec.bytes[3] and $7e)=$3e));
+            end;
+          s64real:
+            begin
+              doublerec.value:=value;
+              doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+              Result:=(doublerec.bytes[0]=0) and (doublerec.bytes[1]=0) and (doublerec.bytes[2]=0) and
+                      (doublerec.bytes[3]=0) and (doublerec.bytes[4]=0) and (doublerec.bytes[5]=0) and
+                      ((((doublerec.bytes[6] and $c0)=$0) and ((doublerec.bytes[7] and $7f)=$40)) or
+                       (((doublerec.bytes[6] and $c0)=$c0) and ((doublerec.bytes[7] and $7f)=$3f)));
+            end;
+          else
+            ;
+        end;
+      end;
+
+
 end.
 

+ 23 - 5
compiler/arm/cpuelf.pas

@@ -28,7 +28,7 @@ interface
 implementation
 
   uses
-    globtype,cutils,cclasses,
+    globtype,globals,cutils,cclasses,
     verbose, elfbase,
     systems,aasmbase,ogbase,ogelf,assemble;
 
@@ -335,9 +335,24 @@ implementation
           result:=R_ARM_THM_CALL;
         RELOC_GOT32:
           result:=R_ARM_GOT_BREL;
+        RELOC_TPOFF:
+          if current_settings.tlsmodel=tlsm_initial_exec then
+            result:=R_ARM_TLS_IE32
+          else if current_settings.tlsmodel=tlsm_local_exec then
+            result:=R_ARM_TLS_LE32
+          else
+            Internalerror(2019092901);
+        RELOC_TLSGD:
+          result:=R_ARM_TLS_GD32;
+        RELOC_TLSDESC:
+          result:=R_ARM_TLS_GOTDESC;
+        RELOC_TLS_CALL:
+          result:=R_ARM_TLS_CALL;
+        RELOC_ARM_CALL:
+          result:=R_ARM_CALL;
+        RELOC_DTPOFF:
+          result:=R_ARM_TLS_LDO32;
       else
-        result:=0;
-        writeln(objrel.typ);
         InternalError(2012110602);
       end;
     end;
@@ -588,6 +603,8 @@ implementation
               data.Write(zero,4);
               continue;
             end;
+          else
+            ;
         end;
 
         if (objreloc.flags and rf_raw)=0 then
@@ -955,8 +972,9 @@ implementation
          asmbin : '';
          asmcmd : '';
          supported_targets : [system_arm_embedded,system_arm_darwin,
-                              system_arm_linux,system_arm_gba,
-                              system_arm_nds,system_arm_aros];
+                              system_arm_linux,system_arm_netbsd,
+                              system_arm_gba,system_arm_nds,
+                              system_arm_aros];
          flags : [af_outputbinary,af_smartlink_sections,af_supports_dwarf];
          labelprefix : '.L';
          comment : '';

+ 84 - 9
compiler/arm/cpuinfo.pas

@@ -14,6 +14,8 @@
 
 Unit CPUInfo;
 
+{$i fpcdefs.inc}
+
 Interface
 
   uses
@@ -52,6 +54,9 @@ Type
        cpu_armv7r,
        cpu_armv7m,
        cpu_armv7em
+       { when new elements added afterwards,
+         update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas
+       }
       );
 
    tinstructionset = (is_thumb,is_arm);
@@ -66,11 +71,35 @@ Type
       fpu_fpa11,
       fpu_vfpv2,
       fpu_vfpv3,
+      fpu_neon_vfpv3,
       fpu_vfpv3_d16,
       fpu_fpv4_s16,
-      fpu_vfpv4
+      fpu_vfpv4,
+      fpu_neon_vfpv4
+      { when new elements added afterwards, update also fpu_vfp_last below and
+        update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
      );
 
+Const
+   fpu_vfp_first = fpu_vfpv2;
+   fpu_vfp_last  = fpu_neon_vfpv4;
+
+  fputypestrllvm : array[tfputype] of string[14] = ('',
+    '',
+    '',
+    '',
+    '',
+    '',
+    'fpu=vfpv2',
+    'fpu=vfpv3',
+    'fpu=neon-vfpv3',
+    'fpu=vfpv3-d16',
+    'fpu=vfpv4-s16',
+    'fpu=vfpv4',
+    'fpu=neon-vfpv4'
+  );
+
+Type
    tcontrollertype =
      (ct_none,
 
@@ -208,6 +237,12 @@ Type
       ct_stm32f051r4,
       ct_stm32f051r6,
       ct_stm32f051r8,
+      ct_stm32f091cc,
+      ct_stm32f091cb,
+      ct_stm32f091rc,
+      ct_stm32f091rb,
+      ct_stm32f091vc,
+      ct_stm32f091vb,
       ct_stm32f100x4, // LD&MD value line, 4=16,6=32,8=64,b=128
       ct_stm32f100x6,
       ct_stm32f100x8,
@@ -473,6 +508,9 @@ Type
       ct_nrf52832_xxaa,
       ct_nrf52840_xxaa,
 
+      { Raspberry Pi 2 }
+      ct_raspi2,
+
       // generic Thumb2 target
       ct_thumb2bare
      );
@@ -489,8 +527,6 @@ Const
    ControllerSupport = true;
    {# Size of native extended floating point type }
    extended_size = 12;
-   {# Size of a multimedia register               }
-   mmreg_size = 16;
    { target cpu string (used by compiler options) }
    target_cpu_string = 'arm';
 
@@ -534,7 +570,8 @@ Const
      'ARMV7EM'
    );
 
-   fputypestr : array[tfputype] of string[9] = ('',
+   fputypestr : array[tfputype] of string[10] = (
+     'NONE',
      'SOFT',
      'LIBGCC',
      'FPA',
@@ -542,9 +579,11 @@ Const
      'FPA11',
      'VFPV2',
      'VFPV3',
+     'NEON_VFPV3',
      'VFPV3_D16',
      'FPV4_S16',
-     'VFPV4'
+     'VFPV4',
+     'NEON_VFPV4'
    );
 
 
@@ -717,6 +756,12 @@ Const
       (controllertypestr:'STM32F051R4';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
       (controllertypestr:'STM32F051R6';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00008000; srambase:$20000000; sramsize:$00001000),
       (controllertypestr:'STM32F051R8';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00002000),
+      (controllertypestr:'STM32F091CC';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091CB';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091RC';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091RB';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091VC';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00008000),
+      (controllertypestr:'STM32F091VB';     controllerunitstr:'STM32F0XX';        cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00008000),
 
       { STM32F1 series }
       (controllertypestr:'STM32F100X4';     controllerunitstr:'STM32F10X_LD';     cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
@@ -983,24 +1028,25 @@ Const
       (controllertypestr:'NRF52832_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       (controllertypestr:'NRF52840_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       
+      { Raspberry Pi 2 }
+      (controllertypestr:'RASPI2'; controllerunitstr:'RASPI2'; cputype:cpu_armv7a; fputype:fpu_vfpv4; flashbase:$00000000; flashsize:$00000000; srambase:$00008000; sramsize:$10000000),
+
       { Bare bones }
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
     );
 
-   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16];
-
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
                                  genericlevel2optimizerswitches+
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath,cs_opt_forcenostackframe];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
@@ -1022,6 +1068,19 @@ Const
        CPUARM_HAS_UMULL
       );
 
+   tfpuflags =
+      (
+        FPUARM_HAS_FPA,                { fpu is an fpa based FPU                                                               }
+        FPUARM_HAS_VFP_EXTENSION,      { fpu is a vfp extension                                                                }
+        FPUARM_HAS_VFP_DOUBLE,         { vfp has double support                                                                }
+        FPUARM_HAS_VFP_SINGLE_ONLY,    { vfp has only single support, disjunct to FPUARM_HAS_VFP_DOUBLE, for error checking    }
+        FPUARM_HAS_32REGS,             { vfp has 32 regs, without this flag, 16 are assumed                                    }
+        FPUARM_HAS_VMOV_CONST,         { vmov supports (some) real constants                                                   }
+        FPUARM_HAS_EXCEPTION_TRAPPING, { vfp does exceptions trapping                                                          }
+        FPUARM_HAS_NEON,               { fpu has neon extensions                                                               }
+        FPUARM_HAS_FMA                 { fpu has fused multiply/add instructions                                               }
+      );
+
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
      ( { cpu_none     } [],
@@ -1045,6 +1104,22 @@ Const
        { cpu_armv7em  } [CPUARM_HAS_ALL_MEM,CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2,CPUARM_HAS_UMULL]
      );
 
+     fpu_capabilities : array[tfputype] of set of tfpuflags =
+       ( { fpu_none       } [],
+         { fpu_soft       } [],
+         { fpu_libgcc     } [],
+         { fpu_fpa        } [FPUARM_HAS_FPA],
+         { fpu_fpa10      } [FPUARM_HAS_FPA],
+         { fpu_fpa11      } [FPUARM_HAS_FPA],
+         { fpu_vfpv2      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
+         { fpu_vfpv3      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
+         { fpu_neon_vfpv3 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
+         { fpu_vfpv3_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
+         { fpu_fpv4_s16   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_SINGLE_ONLY,FPUARM_HAS_VMOV_CONST],
+         { fpu_vfpv4      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_neon_vfpv4 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
+       );
+
    { contains all CPU supporting any kind of thumb instruction set }
    cpu_has_thumb = [cpu_armv4t,cpu_armv5t,cpu_armv5te,cpu_armv5tej,cpu_armv6t2,cpu_armv6z,cpu_armv6m,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em];
 

+ 9 - 3
compiler/arm/cpunode.pas

@@ -30,21 +30,27 @@ unit cpunode;
     uses
        { generic nodes }
        ncgbas,ncgld,ncgflw,ncgcnv,ncgmem,ncgcon,ncgcal,ncgset,ncginl,ncgopt,ncgmat,ncgobjc,
+       { symtable }
+       symcpu,
+       aasmdef,
        { to be able to only parts of the generic code,
          the processor specific nodes must be included
          after the generic one (FK)
        }
+{$ifndef llvm}
        narmadd,
        narmcal,
        narmmat,
        narminl,
+       narmld,
        narmcnv,
        narmcon,
        narmset,
        narmmem,
-       { symtable }
-       symcpu,
-       aasmdef
+       narmutil
+{$else}
+       llvmnode
+{$endif}
        ;
 
 

+ 214 - 63
compiler/arm/cpupara.pas

@@ -30,32 +30,36 @@ unit cpupara;
        globtype,globals,
        aasmdata,
        cpuinfo,cpubase,cgbase,cgutils,
-       symconst,symtype,symdef,parabase,paramgr;
+       symconst,symtype,symdef,parabase,paramgr,armpara;
 
     type
-       tcpuparamanager = class(tparamanager)
+       tcpuparamanager = class(tarmgenparamanager)
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
+          function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
+          procedure getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
          private
+          function usemmpararegs(calloption: tproccalloption; variadic: boolean): boolean;
+          function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
           procedure init_values(p: tabstractprocdef; side: tcallercallee; var curintreg,
             curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword;
             var sparesinglereg: tregister);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
             var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+          procedure paradeftointparaloc(paradef: tdef; paracgsize: tcgsize; out paralocdef: tdef; out paralocsize: tcgsize);
        end;
 
   implementation
 
     uses
        verbose,systems,cutils,
-       defutil,symsym,symcpu,symtable,
+       defutil,symsym,symcpu,symtable,symutil,
        { PowerPC uses procinfo as well in cpupara, so this should not hurt }
        procinfo;
 
@@ -81,7 +85,16 @@ unit cpupara;
       end;
 
 
-    procedure tcpuparamanager.getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    function tcpuparamanager.get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;
+      const
+        saved_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..6] of tsuperregister{$endif} =
+          (RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,RS_R10);
+      begin
+        result:=saved_regs;
+      end;
+
+
+    procedure tcpuparamanager.getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
         psym : tparavarsym;
@@ -120,7 +133,9 @@ unit cpupara;
       end;
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+    function tcpuparamanager.getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+      var
+        basedef: tdef;
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
@@ -134,7 +149,7 @@ unit cpupara;
                 getparaloc:=LOC_MMREGISTER
               else if (calloption in cdecl_pocalls) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16]) then
+                 (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
                   but Mac OS X doesn't seem to do that and linux only does it if
                   built with the "-mfloat-abi=hard" option }
@@ -150,7 +165,11 @@ unit cpupara;
             classrefdef:
               getparaloc:=LOC_REGISTER;
             recorddef:
-              getparaloc:=LOC_REGISTER;
+              if usemmpararegs(calloption,isvariadic) and
+                 is_hfa(p,basedef) then
+                getparaloc:=LOC_MMREGISTER
+              else
+                getparaloc:=LOC_REGISTER;
             objectdef:
               getparaloc:=LOC_REGISTER;
             stringdef:
@@ -165,6 +184,9 @@ unit cpupara;
             arraydef:
               if is_dynamic_array(p) then
                 getparaloc:=LOC_REGISTER
+              else if usemmpararegs(calloption,isvariadic) and
+                 is_hfa(p,basedef) then
+                getparaloc:=LOC_MMREGISTER
               else
                 getparaloc:=LOC_REFERENCE;
             setdef:
@@ -210,6 +232,8 @@ unit cpupara;
             result:=not is_smallset(def);
           stringdef :
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+          else
+            ;
         end;
       end;
 
@@ -218,12 +242,19 @@ unit cpupara;
       var
         i: longint;
         sym: tsym;
+        basedef: tdef;
       begin
         if handle_common_ret_in_param(def,pd,result) then
           exit;
         case def.typ of
           recorddef:
             begin
+              if usemmpararegs(pd.proccalloption,is_c_variadic(pd)) and
+                 is_hfa(def,basedef) then
+                begin
+                  result:=false;
+                  exit;
+                end;
               result:=def.size>4;
               if not result and
                  (target_info.abi in [abi_default,abi_armeb]) then
@@ -263,7 +294,7 @@ unit cpupara;
                   for i:=0 to trecorddef(def).symtable.SymList.count-1 do
                     begin
                       sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
+                      if not is_normal_fieldvarsym(sym) then
                         continue;
                       { bitfield -> ignore }
                       if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
@@ -316,11 +347,13 @@ unit cpupara;
 
       var
         nextintreg,nextfloatreg,nextmmreg : tsuperregister;
-        paradef : tdef;
+        paradef,
+        hfabasedef : tdef;
         paraloc : pcgparalocation;
         stack_offset : aword;
         hp : tparavarsym;
         loc : tcgloc;
+        hfabasesize  : tcgsize;
         paracgsize   : tcgsize;
         paralen : longint;
         i : integer;
@@ -348,6 +381,31 @@ unit cpupara;
         end;
 
 
+      procedure updatemmregs(paradef, basedef: tdef);
+        var
+          regsavailable,
+          regsneeded: longint;
+          basesize: asizeint;
+        begin
+          basesize:=basedef.size;
+          regsneeded:=paradef.size div basesize;
+          regsavailable:=ord(RS_D7)-ord(nextmmreg)+1;
+          case basesize of
+            4:
+              regsavailable:=regsavailable*2+ord(sparesinglereg<>NR_NO);
+            8:
+              ;
+            else
+              internalerror(2019022301);
+          end;
+          if regsavailable<regsneeded then
+            begin
+              nextmmreg:=succ(RS_D7);
+              sparesinglereg:=NR_NO;
+            end;
+        end;
+
+
       begin
         result:=0;
         nextintreg:=curintreg;
@@ -367,6 +425,11 @@ unit cpupara;
             if (p.proccalloption in cstylearrayofconst) and
                is_array_of_const(paradef) then
               begin
+                hp.paraloc[side].def:=paradef;
+                hp.paraloc[side].size:=OS_NO;
+                hp.paraloc[side].alignment:=std_param_align;
+                hp.paraloc[side].intsize:=0;
+
                 paraloc:=hp.paraloc[side].add_location;
                 { hack: the paraloc must be valid, but is not actually used }
                 paraloc^.loc:=LOC_REGISTER;
@@ -413,6 +476,18 @@ unit cpupara;
              hp.paraloc[side].def:=paradef;
              firstparaloc:=true;
 
+             if (loc=LOC_MMREGISTER) and
+                is_hfa(paradef,hfabasedef) then
+               begin
+                 updatemmregs(paradef,hfabasedef);
+                 hfabasesize:=def_cgsize(hfabasedef);
+               end
+             else
+               begin
+                 hfabasedef:=nil;
+                 hfabasesize:=OS_NO;
+               end;
+
 {$ifdef EXTDEBUG}
              if paralen=0 then
                internalerror(200410311);
@@ -420,59 +495,44 @@ unit cpupara;
              while paralen>0 do
                begin
                  paraloc:=hp.paraloc[side].add_location;
-
-                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
-                   case paracgsize of
-                     OS_F32:
-                       begin
-                         paraloc^.size:=OS_32;
-                         paraloc^.def:=u32inttype;
-                       end;
-                     OS_F64:
-                       begin
-                         paraloc^.size:=OS_32;
-                         paraloc^.def:=u32inttype;
-                       end;
-                     else
-                       internalerror(2005082901);
-                   end
-                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   begin
-                     paraloc^.size:=OS_32;
-                     paraloc^.def:=u32inttype;
-                   end
-                 else
-                   begin
-                     paraloc^.size:=paracgsize;
-                     paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
-                   end;
                  case loc of
                     LOC_REGISTER:
                       begin
+                        if paracgsize in [OS_F32,OS_F64,OS_F80] then
+                          case paracgsize of
+                            OS_F32,
+                            OS_F64:
+                              begin
+                                paraloc^.size:=OS_32;
+                                paraloc^.def:=u32inttype;
+                              end;
+                            else
+                              internalerror(2005082901);
+                          end;
                         { align registers for eabi }
                         if (target_info.abi in [abi_eabi,abi_eabihf]) and
                            firstparaloc and
                            (paradef.alignment=8) then
                           begin
+                            hp.paraloc[side].Alignment:=8;
                             if (nextintreg in [RS_R1,RS_R3]) then
                               inc(nextintreg)
                             else if nextintreg>RS_R3 then
                               stack_offset:=align(stack_offset,8);
                           end;
-                        { this is not abi compliant
-                          why? (FK) }
                         if nextintreg<=RS_R3 then
                           begin
+                            paradeftointparaloc(paradef,paracgsize,paraloc^.def,paraloc^.size);
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
                             inc(nextintreg);
                           end
                         else
                           begin
-                            { LOC_REFERENCE always contains everything that's left }
+                            { LOC_REFERENCE always contains everything that's left as a multiple of 4 bytes}
                             paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            paraloc^.def:=carraydef.getreusable_no_free(u8inttype,paralen);
+                            paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
+                            paraloc^.size:=def_cgsize(paraloc^.def);
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
@@ -482,6 +542,8 @@ unit cpupara;
                       end;
                     LOC_FPUREGISTER:
                       begin
+                        paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         if nextfloatreg<=RS_F3 then
                           begin
                             paraloc^.loc:=LOC_FPUREGISTER;
@@ -509,8 +571,18 @@ unit cpupara;
                       end;
                     LOC_MMREGISTER:
                       begin
+                        if assigned(hfabasedef) then
+                          begin
+                            paraloc^.def:=hfabasedef;
+                            paraloc^.size:=hfabasesize;
+                          end
+                        else
+                          begin
+                            paraloc^.size:=paracgsize;
+                            paraloc^.def:=paradef;
+                          end;
                         if (nextmmreg<=RS_D7) or
-                           ((paraloc^.size = OS_F32) and
+                           ((paraloc^.size=OS_F32) and
                             (sparesinglereg<>NR_NO)) then
                           begin
                             paraloc^.loc:=LOC_MMREGISTER;
@@ -546,7 +618,6 @@ unit cpupara;
                             { LOC_REFERENCE always contains everything that's left }
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.size:=int_cgsize(paralen);
-                            paraloc^.def:=carraydef.getreusable_no_free(u8inttype,paralen);
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
@@ -556,6 +627,8 @@ unit cpupara;
                       end;
                     LOC_REFERENCE:
                       begin
+                        paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
                             paraloc^.size:=OS_ADDR;
@@ -568,10 +641,11 @@ unit cpupara;
                             if (target_info.abi in [abi_eabi,abi_eabihf]) and
                                firstparaloc and
                                (paradef.alignment=8) then
-                              stack_offset:=align(stack_offset,8);
+                              begin
+                                stack_offset:=align(stack_offset,8);
+                                hp.paraloc[side].Alignment:=8;
+                              end;
 
-                             paraloc^.size:=paracgsize;
-                             paraloc^.def:=paradef;
                              paraloc^.loc:=LOC_REFERENCE;
                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
                              paraloc^.reference.offset:=stack_offset;
@@ -614,41 +688,101 @@ unit cpupara;
       end;
 
 
+    procedure tcpuparamanager.paradeftointparaloc(paradef: tdef; paracgsize: tcgsize; out paralocdef: tdef; out paralocsize: tcgsize);
+      begin
+        if not(paracgsize in [OS_32,OS_S32]) or
+           (paradef.typ in [arraydef,recorddef]) or
+           is_object(paradef) then
+          begin
+            paralocsize:=OS_32;
+            paralocdef:=u32inttype;
+          end
+        else
+          begin
+            paralocsize:=paracgsize;
+            paralocdef:=paradef;
+          end;
+      end;
+
+
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
-        paraloc : pcgparalocation;
+        paraloc: pcgparalocation;
         retcgsize  : tcgsize;
+        basedef: tdef;
+        i: longint;
+        sparesinglereg: tregister;
+        mmreg : TSuperRegister;
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
 
         paraloc:=result.add_location;
         { Return in FPU register? }
-        if result.def.typ=floatdef then
+        basedef:=nil;
+        sparesinglereg:=NR_NO;
+        if (result.def.typ=floatdef) or
+           is_hfa(result.def,basedef) then
           begin
-            if (target_info.abi=abi_eabihf) or (p.proccalloption=pocall_hardfloat) then
+            if usemmpararegs(p.proccalloption,is_c_variadic(p)) then
               begin
-                paraloc^.loc:=LOC_MMREGISTER;
+                if assigned(basedef) then
+                  begin
+                    for i:=2 to result.def.size div basedef.size do
+                      result.add_location;
+                    retcgsize:=def_cgsize(basedef);
+                  end
+                else
+                  basedef:=result.def;
                 case retcgsize of
                   OS_64,
                   OS_F64:
                     begin
-                      paraloc^.register:=NR_MM_RESULT_REG;
+                      mmreg:=RS_D0;
                     end;
                   OS_32,
                   OS_F32:
                     begin
-                      paraloc^.register:=NR_S0;
+                      mmreg:=RS_S0;
                     end;
                   else
                     internalerror(2012032501);
                 end;
-                paraloc^.size:=retcgsize;
-                paraloc^.def:=result.def;
+                repeat
+                  paraloc^.loc:=LOC_MMREGISTER;
+                  { mm registers are strangly ordered in the arm compiler }
+                  case retcgsize of
+                    OS_32,OS_F32:
+                      begin
+                        if sparesinglereg=NR_NO then
+                          begin
+                            paraloc^.register:=newreg(R_MMREGISTER,mmreg,R_SUBFS);
+                            sparesinglereg:=newreg(R_MMREGISTER,mmreg-RS_S0+RS_S1,R_SUBFS);
+                            inc(mmreg);
+                          end
+                        else
+                          begin
+                            paraloc^.register:=sparesinglereg;
+                            sparesinglereg:=NR_NO;
+                          end;
+                      end;
+                    OS_64,OS_F64:
+                      begin
+                        paraloc^.register:=newreg(R_MMREGISTER,mmreg,R_SUBFD);
+                        inc(mmreg);
+                      end;
+                    else
+                      Internalerror(2019081201);
+                  end;
+
+                  paraloc^.size:=retcgsize;
+                  paraloc^.def:=basedef;
+                  paraloc:=paraloc^.next;
+                until not assigned(paraloc);
               end
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16]) then
+               (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
               begin
                 case retcgsize of
                   OS_64,
@@ -730,8 +864,7 @@ unit cpupara;
                     end;
                   else
                     begin
-                      paraloc^.size:=retcgsize;
-                      paraloc^.def:=result.def;
+                      paradeftointparaloc(result.def,result.size,paraloc^.def,paraloc^.size);
                     end;
                 end;
               end;
@@ -739,6 +872,14 @@ unit cpupara;
       end;
 
 
+    function tcpuparamanager.usemmpararegs(calloption: tproccalloption; variadic: boolean): boolean;
+      begin
+        result:=
+         ((target_info.abi=abi_eabihf) or (calloption=pocall_hardfloat)) and
+          (not variadic);
+      end;
+
+
     function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
       var
         cur_stack_offset: aword;
@@ -753,20 +894,30 @@ unit cpupara;
      end;
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
       var
         cur_stack_offset: aword;
         curintreg, curfloatreg, curmmreg: tsuperregister;
         sparesinglereg:tregister;
       begin
-        init_values(p,callerside,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_values(p,side,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
         if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+          begin
+            { just continue loading the parameters in the registers }
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  result:=create_paraloc_info_intern(p,side,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+                else
+                  internalerror(2019021915);
+              end;
+          end
         else
           internalerror(200410231);
+
+        create_funcretloc_info(p,side);
       end;
 
 begin

+ 16 - 7
compiler/arm/cpupi.pas

@@ -49,13 +49,15 @@ unit cpupi;
           procedure generate_parameter_info;override;
           procedure allocate_got_register(list : TAsmList);override;
           procedure postprocess_code;override;
+
+          procedure allocate_tls_register(list : TAsmList);override;
        end;
 
 
   implementation
 
     uses
-       globals,systems,
+       globals,systems,verbose,
        cpubase,
        tgobj,
        symconst,symtype,symsym,symcpu,paramgr,
@@ -154,6 +156,10 @@ unit cpupi;
             maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
             floatsavesize:=0;
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa11:
@@ -172,18 +178,15 @@ unit cpupi;
                   if firstfloatreg<>RS_NO then
                     floatsavesize:=(lastfloatreg-firstfloatreg+1)*12;
                 end;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
+              else if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
                 begin
                   floatsavesize:=0;
                   regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
                   for r:=RS_D0 to RS_D31 do
                     if r in regs then
                       inc(floatsavesize,8);
-                end;
-              fpu_fpv4_s16:
+                end
+              else
                 begin
                   floatsavesize:=0;
                   regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
@@ -276,6 +279,12 @@ unit cpupi;
         finalizearmcode(aktproccode,aktlocaldata);
       end;
 
+
+    procedure tcpuprocinfo.allocate_tls_register(list: TAsmList);
+      begin
+        current_procinfo.tlsoffset:=cg.getaddressregister(list);
+      end;
+
 begin
    cprocinfo:=tcpuprocinfo;
 end.

+ 6 - 7
compiler/arm/hlcgcpu.pas

@@ -46,8 +46,6 @@ interface
       procedure a_jmp_external_name(list: TAsmList; const externalname: TSymStr); override;
     end;
 
-  procedure create_hlcodegen;
-
 implementation
 
   uses
@@ -66,7 +64,7 @@ implementation
         href : treference;
         l : TAsmLabel;
       begin
-        reference_reset_base(href,voidpointertype,NR_R0,0,sizeof(pint),[]);
+        reference_reset_base(href,voidpointertype,NR_R0,0,ctempposinvalid,sizeof(pint),[]);
         if GenerateThumbCode then
           begin
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
@@ -111,7 +109,7 @@ implementation
           Internalerror(200006139);
         if GenerateThumbCode then
           begin
-            reference_reset_base(href,voidpointertype,NR_R0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint),[]);
+            reference_reset_base(href,voidpointertype,NR_R0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),ctempposinvalid,sizeof(pint),[]);
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
               begin
                 list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
@@ -144,7 +142,7 @@ implementation
           end
         else
           begin
-            reference_reset_base(href,voidpointertype,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint),[]);
+            reference_reset_base(href,voidpointertype,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),ctempposinvalid,sizeof(pint),[]);
             cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
           end;
         if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
@@ -179,7 +177,7 @@ implementation
       if make_global then
         list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0,procdef))
       else
-        list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0,procdef));
+        list.concat(Tai_symbol.Createname_hidden(labelname,AT_FUNCTION,0,procdef));
 
       { the wrapper might need aktlocaldata for the additional data to
         load the constant }
@@ -257,7 +255,7 @@ implementation
 
 
 
-  procedure create_hlcodegen;
+  procedure create_hlcodegen_cpu;
     begin
       if GenerateThumbCode then
         hlcg:=tthumbhlcgcpu.create
@@ -268,4 +266,5 @@ implementation
 
 begin
   chlcgobj:=tbasehlcgarm;
+  create_hlcodegen:=@create_hlcodegen_cpu;
 end.

+ 24 - 22
compiler/arm/narmadd.pas

@@ -161,7 +161,7 @@ interface
 
     function tarmaddnode.use_fma : boolean;
       begin
-       Result:=current_settings.fputype in [fpu_vfpv4];
+       Result:=FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype];
       end;
 
 
@@ -205,10 +205,10 @@ interface
                  location.register,left.location.register,right.location.register),
                  cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          fpu_soft:
+            { this case should be handled already by pass1 }
+            internalerror(200308252);
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
@@ -238,8 +238,9 @@ interface
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op,
                  location.register,left.location.register,right.location.register),pf));
-            end;
-          fpu_fpv4_s16:
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
@@ -263,10 +264,8 @@ interface
               end;
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
-            end;
-          fpu_soft:
-            { this case should be handled already by pass1 }
-            internalerror(200308252);
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
           else
             internalerror(200308251);
         end;
@@ -305,10 +304,7 @@ interface
                    left.location.register,right.location.register),
                    cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
@@ -325,11 +321,12 @@ interface
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
                 left.location.register,right.location.register), pf));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VMRS,NR_APSR_nzcv,NR_FPSCR));
               location.resflags:=GetFpuResFlags;
-            end;
-          fpu_fpv4_s16:
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
@@ -341,10 +338,11 @@ interface
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
                 left.location.register,right.location.register),PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
-            end;
-          fpu_soft:
+            end
+          else
             { this case should be handled already by pass1 }
             internalerror(2009112404);
         end;
@@ -517,6 +515,8 @@ interface
                         cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                         nodetype:=oldnodetype;
                      end;
+                   else
+                     ;
                 end;
                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reglo,right.location.register64.reglo));
@@ -586,7 +586,7 @@ interface
         result := nil;
         notnode := false;
 
-        if current_settings.fputype = fpu_fpv4_s16 then
+        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
           begin
             case tfloatdef(left.resultdef).floattype of
               s32real:
@@ -634,7 +634,7 @@ interface
                   end;
 
                   if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
-                    resultdef:=pasbool8type;
+                    resultdef:=pasbool1type;
                   result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
                       ctypeconvnode.create_internal(right,fdef),
                       ccallparanode.create(
@@ -647,6 +647,8 @@ interface
                   if notnode then
                     result:=cnotnode.create(result);
                 end;
+              else
+                internalerror(2019050933);
             end;
           end
         else
@@ -720,7 +722,7 @@ interface
       begin
         result:=GenerateThumbCode or
           not(CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) or
-          (cs_check_overflow in current_settings.localswitches);
+          needoverflowcheck;
       end;
 
 begin

+ 1 - 1
compiler/arm/narmcal.pas

@@ -83,7 +83,7 @@ implementation
          (target_info.abi<>abi_eabihf) and
          (procdefinition.proccalloption<>pocall_hardfloat) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16])) then
+          (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last])) then
         begin
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary

+ 10 - 14
compiler/arm/narmcnv.pas

@@ -78,7 +78,7 @@ implementation
 {$ifdef cpufpemu}
           (current_settings.fputype=fpu_soft) or
 {$endif cpufpemu}
-          (current_settings.fputype=fpu_fpv4_s16) then
+          (FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) then
           result:=inherited first_int_to_real
         else
           begin
@@ -117,11 +117,7 @@ implementation
               fpu_fpa10,
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16,
-              fpu_fpv4_s16:
+              fpu_vfp_first..fpu_vfp_last:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112702);
@@ -131,7 +127,7 @@ implementation
 
     function tarmtypeconvnode.first_real_to_real: tnode;
       begin
-        if (current_settings.fputype=fpu_fpv4_s16) then
+        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
           begin
             case tfloatdef(left.resultdef).floattype of
               s32real:
@@ -244,10 +240,7 @@ implementation
                   end;
               end;
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
@@ -261,8 +254,8 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,
                 location.register,left.location.register),
                 signedprec2vfppf[signed,location.size]));
-            end;
-          fpu_fpv4_s16:
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
@@ -277,7 +270,10 @@ implementation
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32S32))
               else
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32U32));
-            end;
+            end
+          else
+            { should be handled in pass 1 }
+            internalerror(2019050934);
         end;
       end;
 

+ 93 - 64
compiler/arm/narmcon.pas

@@ -26,10 +26,11 @@ unit narmcon;
 interface
 
     uses
-      ncgcon,cpubase;
+      node,ncgcon,cpubase;
 
     type
       tarmrealconstnode = class(tcgrealconstnode)
+        function pass_1 : tnode;override;
         procedure pass_generate_code;override;
       end;
 
@@ -39,9 +40,10 @@ interface
       verbose,
       globtype,globals,
       cpuinfo,
-      aasmbase,aasmtai,aasmdata,symdef,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symdef,
       defutil,
-      cgbase,cgutils,
+      cgbase,cgutils,cgobj,
       procinfo,
       ncon;
 
@@ -49,6 +51,17 @@ interface
                            TARMREALCONSTNODE
 *****************************************************************************}
 
+    function tarmrealconstnode.pass_1 : tnode;
+      begin
+        result:=nil;
+        if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
+           IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           expectloc:=LOC_MMREGISTER
+         else
+           expectloc:=LOC_CREFERENCE;
+      end;
+
+
     procedure tarmrealconstnode.pass_generate_code;
       { I suppose the parser/pass_1 must make sure the generated real  }
       { constants are actually supported by the target processor? (JM) }
@@ -59,75 +72,91 @@ interface
          lastlabel : tasmlabel;
          realait : tairealconsttype;
          hiloswapped : boolean;
+         pf : TOpPostfix;
 
       begin
-        location_reset_ref(location,LOC_CREFERENCE,def_cgsize(resultdef),4,[]);
-        lastlabel:=nil;
-        realait:=floattype2ait[tfloatdef(resultdef).floattype];
-        hiloswapped:=is_double_hilo_swapped;
-        { const already used ? }
-        if not assigned(lab_real) then
+        if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
+          IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
           begin
-            current_asmdata.getjumplabel(lastlabel);
-            lab_real:=lastlabel;
-            current_procinfo.aktlocaldata.concat(Tai_label.Create(lastlabel));
-            location.reference.symboldata:=current_procinfo.aktlocaldata.last;
-            case realait of
-              aitrealconst_s32bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s32real(ts32real(value_real)));
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s32val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
-
-              aitrealconst_s64bit :
-                begin
-                  if hiloswapped then
-                    current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real_hiloswapped(ts64real(value_real)))
-                  else
-                    current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real(ts64real(value_real)));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s64val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-               end;
-
-              aitrealconst_s80bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s80real(value_real,tfloatdef(resultdef).size));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s80val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            if tfloatdef(resultdef).floattype=s32real then
+              pf:=PF_F32
+            else
+              pf:=PF_F64;
+            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_realconst(A_VMOV,
+               location.register,value_real),pf));
+          end
+        else
+          begin
+            location_reset_ref(location,LOC_CREFERENCE,def_cgsize(resultdef),4,[]);
+            lastlabel:=nil;
+            realait:=floattype2ait[tfloatdef(resultdef).floattype];
+            hiloswapped:=is_double_hilo_swapped;
+            { const already used ? }
+            if not assigned(lab_real) then
+              begin
+                current_asmdata.getjumplabel(lastlabel);
+                lab_real:=lastlabel;
+                current_procinfo.aktlocaldata.concat(Tai_label.Create(lastlabel));
+                location.reference.symboldata:=current_procinfo.aktlocaldata.last;
+                case realait of
+                  aitrealconst_s32bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s32real(ts32real(value_real)));
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s32val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
+
+                  aitrealconst_s64bit :
+                    begin
+                      if hiloswapped then
+                        current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real_hiloswapped(ts64real(value_real)))
+                      else
+                        current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real(ts64real(value_real)));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s64val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                   end;
+
+                  aitrealconst_s80bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s80real(value_real,tfloatdef(resultdef).size));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s80val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
 {$ifdef cpufloat128}
-              aitrealconst_s128bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s128real(value_real));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s128val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
+                  aitrealconst_s128bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s128real(value_real));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s128val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
 {$endif cpufloat128}
 
-              { the round is necessary for native compilers where comp isn't a float }
-              aitrealconst_s64comp :
-                if (value_real>9223372036854775807.0) or (value_real<-9223372036854775808.0) then
-                  message(parser_e_range_check_error)
+                  { the round is necessary for native compilers where comp isn't a float }
+                  aitrealconst_s64comp :
+                    if (value_real>9223372036854775807.0) or (value_real<-9223372036854775808.0) then
+                      message(parser_e_range_check_error)
+                    else
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s64compreal(round(value_real)));
                 else
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s64compreal(round(value_real)));
-            else
-              internalerror(2005092401);
-            end;
+                  internalerror(2005092401);
+                end;
+              end;
+            location.reference.symbol:=lab_real;
+            location.reference.base:=NR_R15;
           end;
-        location.reference.symbol:=lab_real;
-        location.reference.base:=NR_R15;
       end;
 
 begin

+ 51 - 60
compiler/arm/narminl.pas

@@ -86,11 +86,7 @@ implementation
                  location.loc := LOC_FPUREGISTER;
                end;
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16,
-          fpu_fpv4_s16:
+          fpu_vfp_first..fpu_vfp_last:
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location_copy(location,left.location);
@@ -127,18 +123,15 @@ implementation
               fpu_fpa10,
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                expectloc:=LOC_MMREGISTER;
-              fpu_fpv4_s16:
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
+              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
                   else
                     exit(inherited first_abs_real);
-                end;
+                end
               else
                 internalerror(2009112401);
             end;
@@ -158,18 +151,15 @@ implementation
               fpu_fpa10,
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                expectloc:=LOC_MMREGISTER;
-              fpu_fpv4_s16:
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
+              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
                   else
                     exit(inherited first_sqr_real);
-                end;
+                end
               else
                 internalerror(2009112402);
             end;
@@ -189,18 +179,15 @@ implementation
               fpu_fpa10,
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfpv2,
-              fpu_vfpv3,
-              fpu_vfpv4,
-              fpu_vfpv3_d16:
-                expectloc:=LOC_MMREGISTER;
-              fpu_fpv4_s16:
+              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
+              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
                   else
                     exit(inherited first_sqrt_real);
-                end;
+                end
               else
                 internalerror(2009112403);
             end;
@@ -262,28 +249,29 @@ implementation
           fpu_fpa10,
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          fpu_soft:
+            begin
+              if singleprec then
+                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
+              else
+                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
+            end
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
               if singleprec then
                 pf:=PF_F32
               else
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
-            end;
-          fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
-          fpu_soft:
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
             begin
-              if singleprec then
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
-              else
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
+              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-        else
-          internalerror(2009111402);
+          else
+            internalerror(2009111402);
         end;
       end;
 
@@ -299,21 +287,22 @@ implementation
           fpu_fpa10,
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
               if singleprec then
                 pf:=PF_F32
               else
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
-            end;
-          fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
-        else
-          internalerror(2009111403);
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+            begin
+              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else
+            internalerror(2009111403);
         end;
       end;
 
@@ -329,21 +318,22 @@ implementation
           fpu_fpa10,
           fpu_fpa11:
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
               if singleprec then
                 pf:=PF_F32
               else
                 pf:=PF_F64;
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
-            end;
-          fpu_fpv4_s16:
-            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
-        else
-          internalerror(2009111402);
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+            begin
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else
+            internalerror(2009111402);
         end;
       end;
 
@@ -397,7 +387,7 @@ implementation
                 begin
                   r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
                   cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
-                  reference_reset_base(ref,r,0,left.location.reference.alignment,location.reference.volatility);
+                  reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
                   { since the address might be nil we can't use ldr for older cpus }
                   current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
                 end;
@@ -457,7 +447,7 @@ implementation
         negproduct : boolean;
         oppostfix : TOpPostfix;
       begin
-         if current_settings.fputype in [fpu_vfpv4] then
+         if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
            begin
              negop3:=false;
              negproduct:=false;
@@ -515,6 +505,7 @@ implementation
                oppostfix:=PF_F32;
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
                location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
+             cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else
            internalerror(2014032301);

+ 172 - 0
compiler/arm/narmld.pas

@@ -0,0 +1,172 @@
+{
+    Copyright (c) 1998-2018 by Florian Klaempfl
+
+    Generate arm assembler for load nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmld;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      globtype,
+      symsym,
+      node,ncgld,pass_1,aasmbase;
+
+    type
+      tarmloadnode = class(tcgloadnode)
+         procedure generate_threadvar_access(gvs : tstaticvarsym); override;
+      end;
+
+
+implementation
+
+    uses
+      globals,verbose,
+      cgbase,cgobj,cgutils,
+      aasmdata,aasmcpu,
+      systems,
+      symcpu,symdef,
+      nld,
+      cpubase,
+      parabase,
+      procinfo;
+
+{*****************************************************************************
+                            TARMLOADNODE
+*****************************************************************************}
+
+    procedure tarmloadnode.generate_threadvar_access(gvs: tstaticvarsym);
+      var
+        href: treference;
+        hregister : tregister;
+        handled: boolean;
+        l : TAsmLabel;
+      begin
+        handled:=false;
+        if tf_section_threadvars in target_info.flags then
+          begin
+            if target_info.system in [system_arm_linux] then
+              begin
+                if not(pi_uses_threadvar in current_procinfo.flags) then
+                  internalerror(2012012101);
+                case current_settings.tlsmodel of
+                  tlsm_global_dynamic:
+                    begin
+{$ifdef use_tls_dialect_gnu}
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsgd;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,hregister,NR_PC,NR_R0);
+                      cg.g_call(current_asmdata.CurrAsmList,'__tls_get_addr');
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=hregister;
+{$else use_tls_dialect_gnu}
+                      { On arm, we use the gnu2 tls dialect. It has the advantage that it can be relaxed (optimized) by the linker,
+                        this is not possible with the gnu tls dialect.
+
+                        gnu2 is proposed and documented in
+                          Glauber de Oliveira Costa, Alexandre Oliva: Speeding Up Thread-Local Storage Access in DynamicLibraries in the ARM platform, 2006.
+                          Link: https://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
+                      }
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsdesc;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,NR_R0);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+
+                      { we have to go the ugly way so we can set addr_tlscall }
+                      cg.allocallcpuregisters(current_asmdata.CurrAsmList);
+                      cg.a_call_name(current_asmdata.CurrAsmList,gvs.mangledname,false);
+                      with taicpu(current_asmdata.CurrAsmList.Last) do
+                        begin
+                          if opcode<>A_BL then
+                            Internalerror(2019092902);
+                          oper[0]^.ref^.refaddr:=addr_tlscall;
+                        end;
+                      cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
+
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+{$endif use_tls_dialect_gnu}
+                      handled:=true;
+                    end;
+                  tlsm_initial_exec:
+                    begin
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      reference_reset(href,0,[]);
+                      href.base:=NR_PC;
+                      href.index:=hregister;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  tlsm_local_exec:
+                    begin
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      reference_reset(href,0,[]);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  else
+                    Internalerror(2019092802);
+                end;
+              end;
+          end;
+
+        if not handled then
+          inherited;
+      end;
+
+
+begin
+   cloadnode:=tarmloadnode;
+end.

+ 40 - 30
compiler/arm/narmmat.pas

@@ -51,7 +51,7 @@ interface
 implementation
 
     uses
-      globtype,
+      globtype,compinnr,
       cutils,verbose,globals,constexp,
       aasmbase,aasmcpu,aasmtai,aasmdata,
       defutil,
@@ -75,7 +75,7 @@ implementation
         if not(cs_check_overflow in current_settings.localswitches) and
            (right.nodetype=ordconstn) and
            (nodetype=divn) and
-           not(is_64bitint(resultdef)) and
+           not(is_64bit(resultdef)) and
            {Only the ARM and thumb2-isa support umull and smull, which are required for arbitary division by const optimization}
            (GenerateArmCode or
             GenerateThumb2Code or
@@ -87,11 +87,11 @@ implementation
           result:=nil
         else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
           (nodetype=divn) and
-          not(is_64bitint(resultdef)) then
+          not(is_64bit(resultdef)) then
           result:=nil
         else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
           (nodetype=modn) and
-          not(is_64bitint(resultdef)) then
+          not(is_64bit(resultdef)) then
           begin
             if (right.nodetype=ordconstn) and
               ispowerof2(tordconstnode(right).value,power) and
@@ -164,7 +164,7 @@ implementation
                       cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,helper1);
                     if GenerateThumbCode then
                       begin
-                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,32-power,helper1);
+                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,32-power,helper1);
                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ADD,helper2,numerator,helper1));
                       end
                     else
@@ -179,9 +179,12 @@ implementation
                else
                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
              end
-           else {Everything else is handled the generic code}
+           else if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
+             {Everything else is handled the generic code}
              cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
-               tordconstnode(right).value.svalue,numerator,resultreg);
+               tordconstnode(right).value.svalue,numerator,resultreg)
+           else
+             internalerror(2019012601);
          end;
 
 {
@@ -286,8 +289,7 @@ implementation
                 resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
               end;
 
-            if (right.nodetype=ordconstn) and
-               (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
+            if (right.nodetype=ordconstn) then
               begin
                 if nodetype=divn then
                   genOrdConstNodeDiv
@@ -310,6 +312,8 @@ implementation
 *****************************************************************************}
 
     procedure tarmnotnode.second_boolean;
+      var
+        tmpreg : TRegister;
       begin
         { if the location is LOC_JUMP, we do the secondpass after the
           labels are allocated
@@ -328,7 +332,14 @@ implementation
                 begin
                   hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
+                  if is_64bit(resultdef) then
+                    begin
+                      tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
+                      { OR low and high parts together }
+                      current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ORR,tmpreg,left.location.register64.reglo,left.location.register64.reghi),PF_S));
+                    end
+                  else
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
                   location_reset(location,LOC_FLAGS,OS_NO);
                   location.resflags:=F_EQ;
                 end;
@@ -356,7 +367,7 @@ implementation
             exit;
           end;
 
-        if (current_settings.fputype<>fpu_fpv4_s16) or
+        if not(FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) or
           (tfloatdef(resultdef).floattype=s32real) then
           exit(inherited pass_1);
 
@@ -407,10 +418,20 @@ implementation
                 location.register,left.location.register,0),
                 cgsize2fpuoppostfix[def_cgsize(resultdef)]));
             end;
-          fpu_vfpv2,
-          fpu_vfpv3,
-          fpu_vfpv4,
-          fpu_vfpv3_d16:
+          fpu_soft:
+            begin
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
+              location:=left.location;
+              case location.size of
+                OS_32:
+                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.register);
+                OS_64:
+                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.registerhi);
+              else
+                internalerror(2014033101);
+              end;
+            end
+          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[init_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
@@ -424,8 +445,9 @@ implementation
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
                 location.register,left.location.register), pf));
-            end;
-          fpu_fpv4_s16:
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+            end
+          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[init_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
@@ -433,19 +455,7 @@ implementation
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
                 location.register,left.location.register), PF_F32));
-            end;
-          fpu_soft:
-            begin
-              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
-              location:=left.location;
-              case location.size of
-                OS_32:
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.register);
-                OS_64:
-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.registerhi);
-              else
-                internalerror(2014033101);
-              end;
+              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
           else
             internalerror(2009112602);

+ 40 - 36
compiler/arm/narmset.pas

@@ -41,9 +41,9 @@ interface
        end;
 
       tarmcasenode = class(tcgcasenode)
-         procedure optimizevalues(var max_linear_list:aint;var max_dist:aword);override;
+         procedure optimizevalues(var max_linear_list:int64;var max_dist:qword);override;
          function  has_jumptable : boolean;override;
-         procedure genjumptable(hp : pcaselabel;min_,max_ : aint);override;
+         procedure genjumptable(hp : pcaselabel;min_,max_ : int64);override;
          procedure genlinearlist(hp : pcaselabel);override;
          procedure genjmptreeentry(p : pcaselabel;parentvalue : TConstExprInt);override;
       end;
@@ -136,7 +136,7 @@ implementation
                             TARMCASENODE
 *****************************************************************************}
 
-    procedure tarmcasenode.optimizevalues(var max_linear_list:aint;var max_dist:aword);
+    procedure tarmcasenode.optimizevalues(var max_linear_list:int64;var max_dist:qword);
       begin
         inc(max_linear_list,2)
       end;
@@ -148,7 +148,7 @@ implementation
       end;
 
 
-    procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : aint);
+    procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : int64);
       var
         last : TConstExprInt;
         tmpreg,
@@ -161,22 +161,30 @@ implementation
 
         procedure genitem(list:TAsmList;t : pcaselabel);
           var
-            i : aint;
+            i : int64;
           begin
             if assigned(t^.less) then
               genitem(list,t^.less);
             { fill possible hole }
-            for i:=last.svalue+1 to t^._low.svalue-1 do
-              if cs_create_pic in current_settings.moduleswitches then
-                list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,elselabel,picoffset))
-              else
-                list.concat(Tai_const.Create_sym(elselabel));
-            for i:=t^._low.svalue to t^._high.svalue do
-              if cs_create_pic in current_settings.moduleswitches then
-                list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,blocklabel(t^.blockid),picoffset))
-              else
-                list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
-            last:=t^._high.svalue;
+            i:=last+1;
+            while i<=t^._low-1 do
+              begin
+                if cs_create_pic in current_settings.moduleswitches then
+                  list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,elselabel,picoffset))
+                else
+                  list.concat(Tai_const.Create_sym(elselabel));
+                i:=i+1;
+              end;
+            i:=t^._low;
+            while i<=t^._high do
+              begin
+                if cs_create_pic in current_settings.moduleswitches then
+                  list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,blocklabel(t^.blockid),picoffset))
+                else
+                  list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
+                i:=i+1;
+              end;
+            last:=t^._high;
             if assigned(t^.greater) then
               genitem(list,t^.greater);
           end;
@@ -399,7 +407,6 @@ implementation
       procedure tarmcasenode.genjmptreeentry(p : pcaselabel;parentvalue : TConstExprInt);
         var
           lesslabel,greaterlabel : tasmlabel;
-          less,greater : pcaselabel;
           cond_gt: TResFlags;
           cmplow : Boolean;
         begin
@@ -424,25 +431,22 @@ implementation
           { no range label: }
           if p^._low=p^._high then
             begin
-               if greaterlabel=lesslabel then
-                 begin
-                   if p^._low-1<>parentvalue then
-                     hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_NE,p^._low,hregister,lesslabel);
-                 end
-               else
-                 begin
-                   cmplow:=p^._low-1<>parentvalue;
-                   if cmplow then
-                     hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_lt,p^._low,hregister,lesslabel);
-                   if p^._high+1<>parentvalue then
-                     begin
-                       if cmplow then
-                         hlcg.a_jmp_flags(current_asmdata.CurrAsmList,cond_gt,greaterlabel)
-                       else
-                         hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_gt,p^._low,hregister,greaterlabel);
-                     end;
-                 end;
-               hlcg.a_jmp_always(current_asmdata.CurrAsmList,blocklabel(p^.blockid));
+              if greaterlabel=lesslabel then
+                hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_NE,p^._low,hregister,lesslabel)
+              else
+                begin
+                  cmplow:=p^._low-1<>parentvalue;
+                  if cmplow then
+                    hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_lt,p^._low,hregister,lesslabel);
+                  if p^._high+1<>parentvalue then
+                    begin
+                      if cmplow then
+                        hlcg.a_jmp_flags(current_asmdata.CurrAsmList,cond_gt,greaterlabel)
+                      else
+                        hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_gt,p^._low,hregister,greaterlabel);
+                    end;
+                end;
+              hlcg.a_jmp_always(current_asmdata.CurrAsmList,blocklabel(p^.blockid));
             end
           else
             begin

+ 327 - 0
compiler/arm/narmutil.pas

@@ -0,0 +1,327 @@
+{
+    Copyright (c) 2019 by Florian Klämpfl
+
+    ARM version of some node tree helper routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmutil;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    cclasses,ngenutil;
+
+  type
+    tarmnodeutils = class(tnodeutils)
+      class procedure InsertObjectInfo; override;
+      class procedure insert_init_final_table(entries: tfplist); override;
+    end;
+
+
+  implementation
+
+    uses
+      verbose,
+      systems,
+      globals,
+      cpuinfo,cpubase,
+      cgbase,cgutils,
+      aasmbase,aasmdata,aasmtai,aasmcpu,
+      symdef;
+
+    const
+      Tag_File = 1;
+      Tag_Section = 2;
+      Tag_Symbol = 3;
+      Tag_CPU_raw_name = 4;
+      Tag_CPU_name = 5;
+      Tag_CPU_arch = 6;
+      Tag_CPU_arch_profile = 7;
+      Tag_ARM_ISA_use = 8;
+      Tag_THUMB_ISA_use = 9;
+      Tag_FP_Arch = 10;
+      Tag_WMMX_arch = 11;
+      Tag_Advanced_SIMD_arch = 12;
+      Tag_PCS_config = 13;
+      Tag_ABI_PCS_R9_use = 14;
+      Tag_ABI_PCS_RW_data = 15;
+      Tag_ABI_PCS_RO_data = 16;
+      Tag_ABI_PCS_GOT_use = 17;
+      Tag_ABI_PCS_wchar_t = 18;
+      Tag_ABI_FP_rounding = 19;
+      Tag_ABI_FP_denormal = 20;
+      Tag_ABI_FP_exceptions = 21;
+      Tag_ABI_FP_user_exceptions = 22;
+      Tag_ABI_FP_number_model = 23;
+      Tag_ABI_align_needed = 24;
+      Tag_ABI_align8_preserved = 25;
+      Tag_ABI_enum_size = 26;
+      Tag_ABI_HardFP_use = 27;
+      Tag_ABI_VFP_args = 28;
+      Tag_ABI_WMMX_args = 29;
+      Tag_ABI_optimization_goals = 30;
+      Tag_ABI_FP_optimization_goals = 31;
+      Tag_compatiblity = 32;
+      Tag_CPU_unaligned_access = 34;
+      Tag_FP_HP_extension = 36;
+      Tag_ABI_FP_16bit_format = 38;
+      Tag_MPextension_use = 42;
+      Tag_DIV_use = 44;
+      Tag_nodefaults = 64;
+      Tag_also_compatible_with = 65;
+      Tag_conformance = 67;
+      Tag_T2EE_use = 66;
+      Tag_Virtualization_use = 68;
+
+    class procedure tarmnodeutils.InsertObjectInfo;
+      begin
+        inherited InsertObjectInfo;
+        { write eabi attributes to object file? }
+        if (target_info.system in [system_arm_linux]) and (target_info.abi in [abi_eabihf,abi_eabi]) then
+          begin
+            case current_settings.cputype of
+              cpu_armv3:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,''));
+                end;
+              cpu_armv4:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,1));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4'));
+                end;
+              cpu_armv4t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,2));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4T'));
+                end;
+              cpu_armv5t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,3));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5T'));
+                end;
+              cpu_armv5te:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,4));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TE'));
+                end;
+              cpu_armv5tej:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,5));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TEJ'));
+                end;
+              cpu_armv6:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,6));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6'));
+                end;
+              cpu_armv6k:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,9));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6K'));
+                end;
+              cpu_armv6t2:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,8));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'T2'));
+                end;
+              cpu_armv6z:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,7));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6Z'));
+                end;
+              cpu_armv6m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,11));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6-M'));
+                end;
+              cpu_armv7:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7'));
+                end;
+              cpu_armv7a:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$41));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-A'));
+                end;
+              cpu_armv7r:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$52));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-R'));
+                end;
+              cpu_armv7m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-M'));
+                end;
+              cpu_armv7em:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,13));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7E-M'));
+                end;
+              else
+                Internalerror(2019100602);
+            end;
+            case current_settings.fputype of
+              fpu_soft,
+              fpu_libgcc:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,0));
+              fpu_vfpv2:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,2));
+              fpu_vfpv3,
+              fpu_neon_vfpv3:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,3));
+              fpu_vfpv3_d16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,4));
+              fpu_fpv4_s16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,6));
+              fpu_vfpv4,
+              fpu_neon_vfpv4:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,5));
+              else
+                Internalerror(2019100603);
+            end;
+            if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,2))
+            else if FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,1))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ARM_ISA_use,1));
+            if CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,2))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_VFP_args,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_denormal,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_exceptions,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_number_model,3));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align_needed,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align8_preserved,1));
+            { gcc typically writes more like enum size, wchar size, optimization goal, however, this
+              is normally not module global in FPC }
+          end;
+      end;
+
+    class procedure tarmnodeutils.insert_init_final_table(entries:tfplist);
+
+      procedure genentry(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R14]),PF_FD));
+            end;
+        end;
+
+      procedure genexit(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R15]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R15]),PF_FD));
+            end;
+        end;
+
+      var
+        initList, finalList, header: TAsmList;
+        entry : pinitfinalentry;
+        i : longint;
+      begin
+        if not(tf_init_final_units_by_calls in target_info.flags) then
+          begin
+            inherited insert_init_final_table(entries);
+            exit;
+          end;
+        initList:=TAsmList.create;
+        finalList:=TAsmList.create;
+
+        genentry(finalList);
+        genentry(initList);
+
+        for i:=0 to entries.count-1 do
+          begin
+            entry:=pinitfinalentry(entries[i]);
+            if entry^.finifunc<>'' then
+              finalList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.finifunc,AT_FUNCTION)));
+            if entry^.initfunc<>'' then
+              initList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.initfunc,AT_FUNCTION)));
+          end;
+
+        genexit(finalList);
+        genexit(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_INIT_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_INIT_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        initList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_FINALIZE_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_FINALIZE_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        finalList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(finalList);
+
+        initList.Free;
+        finalList.Free;
+
+        inherited insert_init_final_table(entries);
+      end;
+
+  begin
+    cnodeutils:=tarmnodeutils;
+  end.
+

+ 17 - 12
compiler/arm/raarmgas.pas

@@ -65,10 +65,10 @@ Unit raarmgas;
       globtype,globals,verbose,
       systems,aasmbase,aasmtai,aasmdata,aasmcpu,
       { symtable }
-      symconst,symsym,
+      symconst,symsym,symdef,
       procinfo,
       rabase,rautils,
-      cgbase,cgutils;
+      cgbase,cgutils,paramgr;
 
 
     function tarmunifiedattreader.is_unified: boolean;
@@ -147,6 +147,7 @@ Unit raarmgas;
           end;
       end;
 
+
     function tarmattreader.is_targetdirective(const s: string): boolean;
       begin
         case s of
@@ -163,7 +164,7 @@ Unit raarmgas;
     procedure tarmattreader.ReadSym(oper : tarmoperand);
       var
          tempstr, mangledname : string;
-         typesize,l,k : longint;
+         typesize,l,k : tcgint;
       begin
         tempstr:=actasmpattern;
         Consume(AS_ID);
@@ -310,7 +311,7 @@ Unit raarmgas;
       procedure read_index(require_rbracket : boolean);
         var
           recname : string;
-          o_int,s_int : aint;
+          o_int,s_int : tcgint;
         begin
           case actasmtoken of
             AS_REGISTER :
@@ -576,7 +577,7 @@ Unit raarmgas;
     Procedure tarmattreader.BuildOperand(oper : tarmoperand);
       var
         expr : string;
-        typesize,l : longint;
+        typesize,l : tcgint;
 
 
         procedure AddLabelOperand(hl:tasmlabel);
@@ -607,7 +608,7 @@ Unit raarmgas;
             hasdot  : boolean;
             l,
             toffset,
-            tsize   : longint;
+            tsize   : tcgint;
           begin
             if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
              exit;
@@ -634,10 +635,8 @@ Unit raarmgas;
                   { don't allow direct access to fields of parameters, because that
                     will generate buggy code. Allow it only for explicit typecasting }
                   if hasdot and
-                     (not oper.hastype) and
-                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
-                     (current_procinfo.procdef.proccalloption<>pocall_register) then
-                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                     (not oper.hastype) then
+                     checklocalsubscript(oper.opr.localsym);
                   inc(oper.opr.localsymofs,l)
                 end;
               OPR_CONSTANT :
@@ -725,6 +724,8 @@ Unit raarmgas;
                         end;
                     end;
                 end;
+              else
+               ;
             end;
           end;
 
@@ -803,7 +804,7 @@ Unit raarmgas;
           var
             symtype: TAsmsymtype;
             sym: string;
-            val: aint;
+            val: tcgint;
           begin
             case actasmtoken of
               AS_INTNUM,
@@ -818,6 +819,8 @@ Unit raarmgas;
                   oper.opr.ref.base:=NR_PC;
                   oper.opr.ref.symbol:=GetConstLabel(sym,val);
                 end;
+              else
+                ;
             end;
           end;
 
@@ -1144,6 +1147,8 @@ Unit raarmgas;
               else
                 Message(asmr_e_invalid_operand_type); // Otherwise it would have been seen as a AS_REGISTER
             end;
+          else
+            Message(asmr_e_invalid_operand_type);
         end;
       end;
 
@@ -1434,7 +1439,7 @@ Unit raarmgas;
       var
         symname,
         symval  : String;
-        val     : aint;
+        val     : tcgint;
         symtyp  : TAsmsymtype;
       begin
         case actasmpattern of

+ 14 - 6
compiler/arm/rgcpu.pas

@@ -166,6 +166,8 @@ unit rgcpu;
                     if current_procinfo.framepointer<>r then
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                   end;
+              else
+                ;
             end;
           end;
       end;
@@ -195,7 +197,7 @@ unit rgcpu;
           {$endif}
           cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
           cg.a_op_reg_reg(helplist,OP_ADD,OS_ADDR,current_procinfo.framepointer,hreg);
-          reference_reset_base(tmpref,hreg,0,sizeof(aint),[]);
+          reference_reset_base(tmpref,hreg,0,spilltemp.temppos,sizeof(aint),[]);
         end
       else if is_shifter_const(a and not($FFF), immshift) then
         if spilltemp.offset > 0 then
@@ -205,7 +207,7 @@ unit rgcpu;
             {$endif}
             helplist.concat(taicpu.op_reg_reg_const(A_ADD, hreg, current_procinfo.framepointer,
                                                       a and not($FFF)));
-            reference_reset_base(tmpref, hreg, a and $FFF, sizeof(aint),[]);
+            reference_reset_base(tmpref, hreg, a and $FFF, spilltemp.temppos, sizeof(aint),[]);
           end
         else
           begin
@@ -214,7 +216,7 @@ unit rgcpu;
             {$endif}
             helplist.concat(taicpu.op_reg_reg_const(A_SUB, hreg, current_procinfo.framepointer,
                                                       a and not($FFF)));
-            reference_reset_base(tmpref, hreg, -(a and $FFF), sizeof(aint),[]);
+            reference_reset_base(tmpref, hreg, -(a and $FFF), spilltemp.temppos, sizeof(aint),[]);
           end
       else
         begin
@@ -222,7 +224,7 @@ unit rgcpu;
           helplist.concat(tai_comment.create(strpnew('Spilling: Use a_load_const_reg to fix spill offset')));
           {$endif}
           cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
-          reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(aint),[]);
+          reference_reset_base(tmpref,current_procinfo.framepointer,0,spilltemp.temppos,sizeof(aint),[]);
           tmpref.index:=hreg;
         end;
 
@@ -353,6 +355,8 @@ unit rgcpu;
                 RS_S21,RS_S23,RS_S25,RS_S27,RS_S29,RS_S31] do
                 add_edge(supreg,i);
             end;
+          else
+            ;
         end;
       end;
 
@@ -483,7 +487,7 @@ unit rgcpu;
             tmpref.base:=NR_R15;
             helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
 
-            reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(aint),[]);
+            reference_reset_base(tmpref,current_procinfo.framepointer,0,ctempposinvalid,sizeof(aint),[]);
             tmpref.index:=hreg;
 
             if spilltemp.index<>NR_NO then
@@ -543,7 +547,7 @@ unit rgcpu;
             if spilltemp.index<>NR_NO then
               internalerror(200401263);
 
-            reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(pint),[]);
+            reference_reset_base(tmpref,current_procinfo.framepointer,0,ctempposinvalid,sizeof(pint),[]);
             tmpref.index:=hreg;
 
             helplist.concat(spilling_create_store(tempreg,tmpref));
@@ -606,6 +610,8 @@ unit rgcpu;
                     if current_procinfo.framepointer<>r then
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                   end;
+              else
+                ;
             end;
           end;
       end;
@@ -658,6 +664,8 @@ unit rgcpu;
                        add_edge(getsupreg(taicpu(p).oper[0]^.reg),i);
                      end;
                  end;
+              else
+                ;
             end;
           end;
       end;

+ 2 - 2
compiler/arm/symcpu.pas

@@ -101,7 +101,7 @@ type
     { library symbol for AROS }
     libsym : tsym;
     libsymderef : tderef;
-    function getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp): tstoreddef; override;
+    function getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp; const paraprefix: string): tstoreddef; override;
     procedure buildderef; override;
     procedure deref; override;
   end;
@@ -208,7 +208,7 @@ implementation
     end;
 
 
-  function tcpuprocdef.getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp): tstoreddef;
+  function tcpuprocdef.getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp; const paraprefix: string): tstoreddef;
     begin
       result:=inherited;
       if newtyp=procdef then

+ 121 - 0
compiler/armgen/armpara.pas

@@ -0,0 +1,121 @@
+{
+    Copyright (c) 2019 by Jonas Maebe
+
+    ARM and AArch64 common parameter helpers
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ****************************************************************************
+}
+unit armpara;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  symtype,
+  paramgr;
+
+type
+  tarmgenparamanager = class(tparamanager)
+   protected
+    { Returns whether a def is a "homogeneous float array" at the machine level.
+      This means that in the memory layout, the def only consists of maximally
+      4 floating point values that appear consecutively in memory }
+    function is_hfa(p: tdef; out basedef: tdef) : boolean;
+   private
+    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
+  end;
+
+
+implementation
+
+  uses
+    symconst,symdef,symsym,symutil,defutil;
+
+
+  function tarmgenparamanager.is_hfa(p: tdef; out basedef: tdef): boolean;
+    var
+      elecount: longint;
+    begin
+      result:=false;
+      basedef:=nil;
+      elecount:=0;
+      result:=is_hfa_internal(p,basedef,elecount);
+      result:=
+        result and
+        (elecount>0) and
+        (elecount<=4) and
+        (p.size=basedef.size*elecount)
+      end;
+
+
+  function tarmgenparamanager.is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
+    var
+      i: longint;
+      sym: tsym;
+      tmpelecount: longint;
+    begin
+      result:=false;
+      case p.typ of
+        arraydef:
+          begin
+            if is_special_array(p) then
+              exit;
+            { an array of empty records has no influence }
+            if tarraydef(p).elementdef.size=0 then
+              begin
+                result:=true;
+                exit
+              end;
+            tmpelecount:=0;
+            if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
+              exit;
+            { tmpelecount now contains the number of hfa elements in a
+              single array element (e.g. 2 if it's an array of a record
+              containing two singles) -> multiply by number of elements
+              in the array }
+            inc(elecount,tarraydef(p).elecount*tmpelecount);
+            if elecount>4 then
+              exit;
+            result:=true;
+          end;
+        floatdef:
+          begin
+            if not assigned(basedef) then
+              basedef:=p
+            else if basedef<>p then
+              exit;
+            inc(elecount);
+            result:=true;
+          end;
+        recorddef:
+          begin
+            for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
+              begin
+                sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
+                if not is_normal_fieldvarsym(sym) then
+                  continue;
+                if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
+                  exit
+              end;
+            result:=true;
+          end;
+        else
+          exit
+      end;
+    end;
+
+end.

+ 279 - 18
compiler/assemble.pas

@@ -156,9 +156,12 @@ interface
         function single2str(d : single) : string; virtual;
         function double2str(d : double) : string; virtual;
         function extended2str(e : extended) : string; virtual;
-        Function DoPipe:boolean;
+        Function DoPipe:boolean; virtual;
 
         function CreateNewAsmWriter: TExternalAssemblerOutputFile; virtual;
+
+        {# Return true if the external assembler should run again }
+        function RerunAssembler: boolean; virtual;
       public
 
         {# Returns the complete path and executable name of the assembler
@@ -250,11 +253,19 @@ Implementation
 {$ifdef memdebug}
       cclasses,
 {$endif memdebug}
-      script,fmodule,verbose,
+{$ifdef OMFOBJSUPPORT}
+      omfbase,
+      ogomf,
+{$endif OMFOBJSUPPORT}
+{$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+      sfpux80,
+{$endif FPC_SOFT_FPUX80}
+{$endif}
+      cscript,fmodule,verbose,
       cpuinfo,
-      aasmcpu,
-      owar,owomflib
-      ;
+      aasmcpu;
 
     var
       CAssembler : array[tasm] of TAssemblerClass;
@@ -410,6 +421,7 @@ Implementation
         s: ansistring;
       begin
         MaybeAddLinePrefix;
+        s:='';
         setlength(s,len);
         move(p^,s[1],len);
         AsmWriteAnsiStringUnfiltered(decorator.LineFilter(s));
@@ -730,9 +742,13 @@ Implementation
 
     Function TExternalAssembler.DoPipe:boolean;
       begin
+{$ifdef hasunix}
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
                 ((asminfo^.id in [as_gas,as_ggas,as_darwin,as_powerpc_xcoff,as_clang,as_solaris_as]));
+{$else hasunix}
+        DoPipe:=false;
+{$endif}
       end;
 
 
@@ -869,7 +885,7 @@ Implementation
 
     Function TExternalAssembler.DoAssemble:boolean;
       begin
-        DoAssemble:=true;
+        result:=true;
         if DoPipe then
          exit;
         if not(cs_asm_extern in current_settings.globalswitches) then
@@ -883,13 +899,13 @@ Implementation
            Message1(exec_i_assembling,name);
          end;
 
-        if CallAssembler(FindAssembler,MakeCmdLine) then
-         writer.RemoveAsm
+        repeat
+          result:=CallAssembler(FindAssembler,MakeCmdLine)
+        until not(result) or not RerunAssembler;
+        if result then
+          writer.RemoveAsm
         else
-         begin
-            DoAssemble:=false;
-            GenerateError;
-         end;
+          GenerateError;
       end;
 
 
@@ -967,6 +983,12 @@ Implementation
       end;
 
 
+    function TExternalAssembler.RerunAssembler: boolean;
+      begin
+        result:=false;
+      end;
+
+
     procedure TExternalAssembler.WriteSourceLine(hp: tailineinfo);
       var
         module : tmodule;
@@ -1047,6 +1069,10 @@ Implementation
         ccomp: comp;
 {$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
         eextended: extended;
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+	eextended: floatx80;
+{$endif}
 {$endif cpuextended}
       begin
         if do_line then
@@ -1060,6 +1086,20 @@ Implementation
               { can't write full 80 bit floating point constants yet on non-x86 }
               aitrealconst_s80bit:
                 writer.AsmWriteLn(asminfo^.comment+'value: '+extended2str(tai_realconst(hp).value.s80val));
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+{$push}{$warn 6018 off} { Unreachable code due to compile time evaluation }
+             aitrealconst_s80bit:
+               begin
+     	         if sizeof(tai_realconst(hp).value.s80val) = sizeof(double) then
+                   writer.AsmWriteLn(asminfo^.comment+'value: '+double2str(tai_realconst(hp).value.s80val))
+     	         else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
+                   writer.AsmWriteLn(asminfo^.comment+'value: '+single2str(tai_realconst(hp).value.s80val))
+                else
+     	         internalerror(2017091901);
+       	      end;
+{$pop}
+{$endif}
 {$endif cpuextended}
               aitrealconst_s64comp:
                 writer.AsmWriteLn(asminfo^.comment+'value: '+extended2str(tai_realconst(hp).value.s64compval));
@@ -1089,6 +1129,21 @@ Implementation
               eextended:=extended(tai_realconst(hp).value.s80val);
               pdata:=@eextended;
             end;
+{$else}
+{$ifdef FPC_SOFT_FPUX80}
+{$push}{$warn 6018 off} { Unreachable code due to compile time evaluation }
+          aitrealconst_s80bit:
+            begin
+	      if sizeof(tai_realconst(hp).value.s80val) = sizeof(double) then
+                eextended:=float64_to_floatx80(float64(double(tai_realconst(hp).value.s80val)))
+	      else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
+	        eextended:=float32_to_floatx80(float32(single(tai_realconst(hp).value.s80val)))
+	      else
+	        internalerror(2017091901);
+              pdata:=@eextended;
+            end;
+{$pop}
+{$endif}
 {$endif cpuextended}
           aitrealconst_s64comp:
             begin
@@ -1503,6 +1558,7 @@ Implementation
         objsym,
         objsymend : TObjSymbol;
         cpu: tcputype;
+        eabi_section, TmpSection: TObjSection;
       begin
         while assigned(hp) do
          begin
@@ -1560,9 +1616,11 @@ Implementation
                                     (objsym.objsection<>ObjData.CurrObjSec) then
                                    InternalError(200404124);
                                end
+{$push} {$R-}{$Q-}
                              else
                                Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                            end;
+{$pop}
                        end;
                    end;
                  ObjData.alloc(tai_const(hp).size);
@@ -1593,6 +1651,11 @@ Implementation
                              break;
                            end;
                      end;
+{$ifdef OMFOBJSUPPORT}
+                   asd_omf_linnum_line:
+                     { ignore for now, but should be added}
+                     ;
+{$endif OMFOBJSUPPORT}
 {$ifdef ARM}
                    asd_thumb_func:
                      ObjData.ThumbFunc:=true;
@@ -1600,13 +1663,20 @@ Implementation
                      { ai_directive(hp).name can be only 16 or 32, this is checked by the reader }
                      ObjData.ThumbFunc:=tai_directive(hp).name='16';
 {$endif ARM}
+{$ifdef RISCV}
+                   asd_option:
+                     internalerror(2019031701);
+{$endif RISCV}
                    else
                      internalerror(2010011101);
                  end;
                end;
              ait_section:
                begin
-                 ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
+                 if Tai_section(hp).sectype=sec_user then
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).secflags,Tai_section(hp).secprogbits,Tai_section(hp).name^,Tai_section(hp).secorder)
+                 else
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
                  Tai_section(hp).sec:=ObjData.CurrObjSec;
                end;
              ait_symbol :
@@ -1630,6 +1700,30 @@ Implementation
              ait_cutobject :
                if SmartAsm then
                 break;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   begin
+                     TmpSection:=ObjData.CurrObjSec;
+                     ObjData.CreateSection(sec_arm_attribute,[],SPB_ARM_ATTRIBUTES,'',secorder_default);
+                     eabi_section:=ObjData.CurrObjSec;
+                     ObjData.setsection(TmpSection);
+                   end;
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100701);
+                 end;
+               end;
+             else
+               ;
            end;
            hp:=Tai(hp.next);
          end;
@@ -1642,6 +1736,7 @@ Implementation
         objsym,
         objsymend : TObjSymbol;
         cpu: tcputype;
+        eabi_section: TObjSection;
       begin
         while assigned(hp) do
          begin
@@ -1653,6 +1748,13 @@ Implementation
                      { here we must determine the fillsize which is used in pass2 }
                      Tai_align_abstract(hp).fillsize:=align(ObjData.CurrObjSec.Size,Tai_align_abstract(hp).aligntype)-
                        ObjData.CurrObjSec.Size;
+
+                     { maximum number of bytes for alignment exeeded? }
+                     if (Tai_align_abstract(hp).aligntype<>Tai_align_abstract(hp).maxbytes) and
+                       (Tai_align_abstract(hp).fillsize>Tai_align_abstract(hp).maxbytes) then
+                       Tai_align_abstract(hp).fillsize:=align(ObjData.CurrObjSec.Size,Byte(Tai_align_abstract(hp).aligntype div 2))-
+                         ObjData.CurrObjSec.Size;
+
                      ObjData.alloc(Tai_align_abstract(hp).fillsize);
                    end;
                end;
@@ -1689,15 +1791,25 @@ Implementation
                    begin
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
-                     if objsymend.objsection<>objsym.objsection then
+                     if Tai_const(hp).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092801);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
                        begin
                          if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
                             (objsym.objsection<>ObjData.CurrObjSec) then
                            internalerror(200905042);
                        end
+{$push} {$R-}{$Q-}
                      else
                        Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
+{$pop}
+                 if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) then
+                   Tai_const(hp).fixsize;
                  ObjData.alloc(tai_const(hp).size);
                end;
              ait_section:
@@ -1746,6 +1858,14 @@ Implementation
                    asd_code:
                      { ignore for now, but should be added}
                      ;
+                   asd_option:
+                     { ignore for now, but should be added}
+                     ;
+{$ifdef OMFOBJSUPPORT}
+                   asd_omf_linnum_line:
+                     { ignore for now, but should be added}
+                     ;
+{$endif OMFOBJSUPPORT}
                    asd_cpu:
                      begin
                        ObjData.CPUType:=cpu_none;
@@ -1760,6 +1880,25 @@ Implementation
                      internalerror(2010011102);
                  end;
                end;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100702);
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100703);
+                 end;
+               end;
+             else
+               ;
            end;
            hp:=Tai(hp.next);
          end;
@@ -1782,10 +1921,18 @@ Implementation
         ddouble : double;
         {$if defined(cpuextended) and defined(FPC_HAS_TYPE_EXTENDED)}
         eextended : extended;
+	{$else}
+        {$ifdef FPC_SOFT_FPUX80}
+	eextended : floatx80;
+        {$endif}
         {$endif}
         ccomp : comp;
         tmp    : word;
         cpu: tcputype;
+        ddword : dword;
+        eabi_section: TObjSection;
+        s: String;
+        TmpDataPos: TObjSectionOfs;
       begin
         fillchar(zerobuf,sizeof(zerobuf),0);
         fillchar(objsym,sizeof(objsym),0);
@@ -1852,6 +1999,21 @@ Implementation
                        eextended:=extended(tai_realconst(hp).value.s80val);
                        pdata:=@eextended;
                      end;
+         {$else}
+         {$ifdef FPC_SOFT_FPUX80}
+           {$push}{$warn 6018 off} { Unreachable code due to compile time evaluation }
+                   aitrealconst_s80bit:
+                     begin
+		       if sizeof(tai_realconst(hp).value.s80val) = sizeof(double) then
+                         eextended:=float64_to_floatx80(float64(double(tai_realconst(hp).value.s80val)))
+		       else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
+			 eextended:=float32_to_floatx80(float32(single(tai_realconst(hp).value.s80val)))
+		       else
+			 internalerror(2017091901);
+                       pdata:=@eextended;
+                     end;
+           {$pop}
+	 {$endif}
          {$endif cpuextended}
                    aitrealconst_s64comp:
                      begin
@@ -1877,8 +2039,29 @@ Implementation
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
                      relative_reloc:=(objsym.objsection<>objsymend.objsection);
-                     Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
+                     if Tai_const(hp).consttype in [aitconst_gottpoff] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092802);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if Tai_const(hp).consttype in [aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092802);
+                         Tai_const(hp).value:=ObjData.CurrObjSec.Size-objsymend.address+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
+                       begin
+                         if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
+                            (objsym.objsection<>ObjData.CurrObjSec) then
+                           internalerror(2019010301);
+                       end
+                     else
+{$push} {$R-}{$Q-}
+                       Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
+{$pop}
                  case tai_const(hp).consttype of
                    aitconst_64bit,
                    aitconst_32bit,
@@ -1931,17 +2114,35 @@ Implementation
 {$ifdef arm}
                    aitconst_got:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOT32);
+{                   aitconst_gottpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF); }
+                   aitconst_tpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF);
+                   aitconst_tlsgd:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSGD);
+                   aitconst_tlsdesc:
+                     begin
+                       { must be a relative symbol, thus value being valid }
+                       if not(assigned(tai_const(hp).sym)) or not(assigned(tai_const(hp).endsym)) then
+                         Internalerror(2019092904);
+                       ObjData.writereloc(Tai_const(hp).value,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSDESC);
+                     end;
 {$endif arm}
+                   aitconst_dtpoff:
+                     { so far, the size of dtpoff is fixed to 4 bytes }
+                     ObjData.writereloc(Tai_const(hp).symofs,4,Objdata.SymbolRef(tai_const(hp).sym),RELOC_DTPOFF);
                    aitconst_gotoff_symbol:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOTOFF);
                    aitconst_uleb128bit,
                    aitconst_sleb128bit :
                      begin
+                       if Tai_const(hp).fixed_size=0 then
+                         Internalerror(2019030302);
                        if tai_const(hp).consttype=aitconst_uleb128bit then
-                         leblen:=EncodeUleb128(qword(Tai_const(hp).value),lebbuf)
+                         leblen:=EncodeUleb128(qword(Tai_const(hp).value),lebbuf,Tai_const(hp).fixed_size)
                        else
-                         leblen:=EncodeSleb128(Tai_const(hp).value,lebbuf);
-                       if leblen<>tai_const(hp).size then
+                         leblen:=EncodeSleb128(Tai_const(hp).value,lebbuf,Tai_const(hp).fixed_size);
+                       if leblen<>tai_const(hp).fixed_size then
                          internalerror(200709271);
                        ObjData.writebytes(lebbuf,leblen);
                      end;
@@ -1997,6 +2198,18 @@ Implementation
                              break;
                            end;
                      end;
+{$ifdef OMFOBJSUPPORT}
+                   asd_omf_linnum_line:
+                     begin
+                       TOmfObjSection(ObjData.CurrObjSec).LinNumEntries.Add(
+                         TOmfSubRecord_LINNUM_MsLink_Entry.Create(
+                           strtoint(tai_directive(hp).name),
+                           ObjData.CurrObjSec.Size
+                         ));
+                     end;
+{$endif OMFOBJSUPPORT}
+                   else
+                     ;
                  end
                end;
              ait_symbolpair:
@@ -2017,6 +2230,54 @@ Implementation
              ait_seh_directive :
                tai_seh_directive(hp).generate_code(objdata);
 {$endif DISABLE_WIN64_SEH}
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100704);
+                 if eabi_section.Size=0 then
+                   begin
+                     s:='A';
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1;
+                     eabi_section.write(ddword,4);
+                     s:='aeabi'#0;
+                     eabi_section.write(s[1],6);
+                     s:=#1;
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1-4-6-1;
+                     eabi_section.write(ddword,4);
+                   end;
+                 leblen:=EncodeUleb128(tai_eabi_attribute(hp).tag,lebbuf,0);
+                 eabi_section.write(lebbuf,leblen);
+
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     begin
+                       leblen:=EncodeUleb128(tai_eabi_attribute(hp).value,lebbuf,0);
+                       eabi_section.write(lebbuf,leblen);
+                     end;
+                   eattrtype_ntbs:
+                     begin
+                       s:=tai_eabi_attribute(hp).valuestr^+#0;
+                       eabi_section.write(s[1],Length(s));
+                     end
+                   else
+                     Internalerror(2019100705);
+                 end;
+                 { update size of attributes section, write directly to the dyn. arrays as
+                   we do not increase the size of section }
+                 TmpDataPos:=eabi_section.Data.Pos;
+                 eabi_section.Data.seek(1);
+                 ddword:=eabi_section.Size-1;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.seek(12);
+                 ddword:=eabi_section.Size-1-4-6;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.Seek(TmpDataPos);
+               end;
+             else
+               ;
            end;
            hp:=Tai(hp.next);
          end;

+ 115 - 24
compiler/avr/aasmcpu.pas

@@ -108,7 +108,7 @@ uses
 
     { replaces cond. branches by rjmp/jmp and the inverse cond. branch if needed
       and transforms special instructions to valid instruction encodings }
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
 
 implementation
 
@@ -396,15 +396,42 @@ implementation
       end;
 
 
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
       var
         CurrOffset : longint;
-        curtai : tai;
+        curtai, firstinstruction: tai;
         again : boolean;
         l : tasmlabel;
         inasmblock : Boolean;
+
+      procedure remove_instruction;
+        var
+          i: Integer;
+          hp: tai;
+        begin
+          taicpu(firstinstruction).opcode:=A_SLEEP;
+          for i:=0 to taicpu(firstinstruction).opercnt-1 do
+            taicpu(firstinstruction).freeop(i);
+          taicpu(firstinstruction).opercnt:=0;
+          taicpu(firstinstruction).ops:=0;
+          firstinstruction:=tai(firstinstruction.Next);
+          while assigned(firstinstruction) do
+            begin
+              if firstinstruction.typ in [ait_symbol_end,ait_label] then
+                firstinstruction:=tai(firstinstruction.Next)
+              else
+                begin
+                  hp:=tai(firstinstruction.Next);
+                  list.Remove(firstinstruction);
+                  firstinstruction.free;
+                  firstinstruction:=hp;
+                end;
+            end;
+        end;
+
       begin
         again:=true;
+        Result:=true;
         while again do
           begin
             again:=false;
@@ -422,6 +449,8 @@ implementation
                       end;
                     ait_align:
                       inc(CurrOffset,tai_align(curtai).aligntype);
+                    ait_const:
+                      inc(CurrOffset,tai_const(curtai).size);
                     ait_symbolpair,
                     ait_marker:
                       ;
@@ -437,34 +466,96 @@ implementation
 
             curtai:=tai(list.first);
             inasmblock:=false;
+            firstinstruction:=nil;
             while assigned(curtai) do
               begin
                 case curtai.typ of
                   ait_instruction:
-                    case taicpu(curtai).opcode of
-                      A_BRxx:
-                        if (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
-                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63) then
+                    begin
+                      if not(assigned(firstinstruction)) then
+                        firstinstruction:=curtai;
+                      case taicpu(curtai).opcode of
+                        A_BRxx:
+                          if (taicpu(curtai).oper[0]^.typ=top_ref) and ((taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
+                            (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63)) then
+                            begin
+                              if inasmblock then
+                                Message(asmw_e_brxx_out_of_range)
+                              else
+                                begin
+                                  current_asmdata.getjumplabel(l);
+                                  list.insertafter(tai_label.create(l),curtai);
+                                  if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
+                                    list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai)
+                                  else
+                                    list.insertafter(taicpu.op_sym(A_RJMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
+                                  taicpu(curtai).oper[0]^.ref^.symbol:=l;
+                                  taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
+                                  again:=true;
+                                end;
+                            end;
+                        A_JMP:
+                          { replace JMP by RJMP? ...
+                            ... but do not mess with asm block }
+                          if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
+                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
+                          { jmps to function go outside the currently considered scope, so do not mess with them.
+                            Those are generated by the peephole optimizer from call/ret sequences }
+                          not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
                           begin
-                            current_asmdata.getjumplabel(l);
-                            list.insertafter(tai_label.create(l),curtai);
-                            list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
-                            taicpu(curtai).oper[0]^.ref^.symbol:=l;
-                            taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
+                            taicpu(curtai).opcode:=A_RJMP;
                             again:=true;
                           end;
-                      A_JMP:
-                        { replace JMP by RJMP? ...
-                          ... but do not mess with asm block }
-                        if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
-                        (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
-                        { jmps to function go outside the currently considered scope, so do not mess with them.
-                          Those are generated by the peephole optimizer from call/ret sequences }
-                        not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
-                        begin
-                          taicpu(curtai).opcode:=A_RJMP;
-                          again:=true;
-                        end;
+                        A_STS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[0]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_OUT;
+                                    taicpu(curtai).loadconst(0,ref^.offset);
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_LDS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[1]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_IN;
+                                    taicpu(curtai).loadconst(1,ref^.offset)
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_SBIW,
+                        A_MULS,
+                        A_ICALL,
+                        A_IJMP,
+                        A_STD,
+                        A_LD,
+                        A_LDD,
+                        A_ST,
+                        A_ROR,
+                        A_POP,
+                        A_PUSH:
+                          begin
+                            { certain cpu types do not support some instructions, so replace them }
+                            if current_settings.cputype=cpu_avr1 then
+                              begin
+                                remove_instruction;
+                                result:=false;
+                              end;
+                          end;
+                      end;
                     end;
                   ait_marker:
                     case tai_marker(curtai).Kind of

+ 25 - 16
compiler/avr/agavrgas.pas

@@ -75,6 +75,9 @@ unit agavrgas;
 
     Procedure TAVRInstrWriter.WriteInstruction(hp : tai);
 
+      var
+        op: TAsmOp;
+
       function getreferencestring(var ref : treference) : string;
         var
           s : string;
@@ -104,22 +107,26 @@ unit agavrgas;
                     NR_R30:
                       s:=s+'Z';
                     else
-                      s:=gas_regname(base);
+                      s:=s+gas_regname(base);
                   end;
                   if addressmode=AM_POSTINCREMENT then
-                    s:=s+'+';
-
-                  if offset>0 then
-                    s:=s+'+'+tostr(offset)
-                  else if offset<0 then
-                    s:=s+tostr(offset)
+                    s:=s+'+'
+                  else if addressmode = AM_UNCHANGED then
+                    begin
+                      if (offset>0) or ((offset=0) and (op in [A_LDD,A_STD])) then
+                        s:=s+'+'+tostr(offset)
+                      else if offset<0 then
+                        s:=s+tostr(offset);
+                    end;
                 end
               else if assigned(symbol) or (offset<>0) then
                 begin
                   if assigned(symbol) then
                     s:=ReplaceForbiddenAsmSymbolChars(symbol.name);
 
-                  if offset<0 then
+                  if s='' then
+                    s:=tostr(offset)
+                  else if offset<0 then
                     s:=s+tostr(offset)
                   else if offset>0 then
                     s:=s+'+'+tostr(offset);
@@ -135,7 +142,10 @@ unit agavrgas;
                     else
                       s:='('+s+')';
                   end;
-                end;
+                end
+              { reference to address 0? }
+              else if not(assigned(symbol)) and (offset=0) then
+                s:='(0)';
             end;
           getreferencestring:=s;
         end;
@@ -157,9 +167,8 @@ unit agavrgas;
                 begin
                   hs:=ReplaceForbiddenAsmSymbolChars(o.ref^.symbol.name);
                   if o.ref^.offset>0 then
-                   hs:=hs+'+'+tostr(o.ref^.offset)
-                  else
-                   if o.ref^.offset<0 then
+                    hs:=hs+'+'+tostr(o.ref^.offset)
+                  else if o.ref^.offset<0 then
                     hs:=hs+tostr(o.ref^.offset);
                   getopstr:=hs;
                 end
@@ -170,10 +179,10 @@ unit agavrgas;
           end;
         end;
 
-    var op: TAsmOp;
-        s: string;
-        i: byte;
-        sep: string[3];
+    var
+      s: string;
+      i: byte;
+      sep: string[3];
     begin
       op:=taicpu(hp).opcode;
       s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition];

+ 281 - 152
compiler/avr/aoptcpu.pas

@@ -26,11 +26,11 @@ Unit aoptcpu;
 
 {$i fpcdefs.inc}
 
-{$define DEBUG_AOPTCPU}
+{ $define DEBUG_AOPTCPU}
 
 Interface
 
-uses cpubase, cgbase, aasmtai, aopt,AoptObj, aoptcpub;
+uses cpubase,cgbase,aasmtai,aopt,AoptObj,aoptcpub;
 
 Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
@@ -42,6 +42,8 @@ Type
     function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
     function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
 
+    function InvertSkipInstruction(var p: tai): boolean;
+
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
@@ -54,6 +56,7 @@ Implementation
     verbose,
     cpuinfo,
     aasmbase,aasmcpu,aasmdata,
+    aoptutils,
     globals,globtype,
     cgutils;
 
@@ -74,7 +77,9 @@ Implementation
         (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
         (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
         (r1.relsymbol = r2.relsymbol) and
-        (r1.addressmode = r2.addressmode);
+        (r1.addressmode = r2.addressmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
     end;
 
 
@@ -126,13 +131,6 @@ Implementation
     end;
 
 
-  function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
-    begin
-      Result:=(taicpu(instr).ops=2) and
-        (taicpu(instr).oper[0]^.typ=ot0) and
-        (taicpu(instr).oper[1]^.typ=ot1);
-    end;
-
 {$ifdef DEBUG_AOPTCPU}
   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
     begin
@@ -150,6 +148,10 @@ Implementation
       If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
               ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
         Result:=true
+      else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
+        ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
+         (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
+        Result:=true
       else
         Result:=inherited RegInInstruction(Reg, p1);
     end;
@@ -206,32 +208,96 @@ Implementation
       if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
         i:=1;
 
-      while(i<p.ops) do
+      while i<p.ops do
         begin
-          case p.oper[I]^.typ of
+          case p.oper[i]^.typ of
             top_reg:
-              Result := (p.oper[I]^.reg = reg) or
+              Result := (p.oper[i]^.reg = reg) or
                 { MOVW }
                 ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
             top_ref:
               Result :=
-                (p.oper[I]^.ref^.base = reg) or
-                (p.oper[I]^.ref^.index = reg);
+                (p.oper[i]^.ref^.base = reg) or
+                (p.oper[i]^.ref^.index = reg);
           end;
           { Bailout if we found something }
           if Result then
             exit;
-          Inc(I);
+          Inc(i);
+        end;
+    end;
+
+
+  {
+    Turns
+      sbis ?
+      jmp .Lx
+      op
+    .Lx:
+
+    Into
+      sbic ?
+      op
+
+    For all types of skip instructions
+  }
+  function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
+
+    function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
+      begin
+        repeat
+          result:=GetNextInstruction(p,next);
+          p:=next;
+        until
+          (not result) or
+          (not assigned(next)) or
+          (next.typ in [ait_instruction]);
+
+        result:=assigned(next) and (next.typ in [ait_instruction]);
+      end;
+
+    var
+      hp1, hp2, hp3: tai;
+      s: string;
+    begin
+      result:=false;
+
+      if GetNextInstruction(taicpu(p),hp1) and
+        (hp1.typ=ait_instruction) and
+        (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
+        (taicpu(hp1).ops=1) and
+        (taicpu(hp1).oper[0]^.typ=top_ref) and
+        (taicpu(hp1).oper[0]^.ref^.offset=0) and
+        (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+        GetNextInstructionWithoutLabel(hp1,hp2) and
+        (hp2.typ=ait_instruction) and
+        (not taicpu(hp2).is_jmp) and
+        GetNextInstruction(hp2,hp3) and
+        FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
+        begin
+          DebugMsg('SkipJump2InvertedSkip', p);
+
+          case taicpu(p).opcode of
+            A_SBIS: taicpu(p).opcode:=A_SBIC;
+            A_SBIC: taicpu(p).opcode:=A_SBIS;
+            A_SBRS: taicpu(p).opcode:=A_SBRC;
+            A_SBRC: taicpu(p).opcode:=A_SBRS;
+          end;
+
+          TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
+
+          asml.remove(hp1);
+          hp1.free;
         end;
     end;
 
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
       hp1,hp2,hp3,hp4,hp5: tai;
       alloc, dealloc: tai_regalloc;
       i: integer;
       l: TAsmLabel;
-      TmpUsedRegs : TAllUsedRegs;
     begin
       result := false;
       case p.typ of
@@ -252,12 +318,12 @@ Implementation
               GetNextInstruction(p, hp1) and
               ((MatchInstruction(hp1, A_CP) and
                 (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
-                  (taicpu(hp1).oper[1]^.reg = NR_R1)) or
+                  (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
                  ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
-                  (taicpu(hp1).oper[0]^.reg = NR_R1) and
+                  (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
                   (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
                                         A_LSL,A_LSR,
-                                        A_OR,A_ORI,A_ROL,A_ROR])))) or
+                                        A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
                (MatchInstruction(hp1, A_CPI) and
                 (taicpu(p).opcode = A_ANDI) and
                 (taicpu(p).oper[1]^.typ=top_const) and
@@ -272,7 +338,9 @@ Implementation
                 EQ = Z=1; NE = Z=0;
                 MI = N=1; PL = N=0; }
               MatchInstruction(hp2, A_BRxx) and
-              (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) { and
+              ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
+              { sub/sbc set all flags }
+               (taicpu(p).opcode in [A_SUB,A_SBI])){ and
               no flag allocation tracking implemented yet on avr
               assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
               begin
@@ -288,7 +356,9 @@ Implementation
                 }
 
                 // If we compare to the same value we are masking then invert the comparison
-                if (taicpu(hp1).opcode=A_CPI) then
+                if (taicpu(hp1).opcode=A_CPI) or
+                  { sub/sbc with reverted? }
+                  ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
                   taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
 
                 asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
@@ -307,20 +377,23 @@ Implementation
                   begin
                     { turn
                       ldi reg0, imm
-                      cp/mov reg1, reg0
+                      <op> reg1, reg0
                       dealloc reg0
                       into
-                      cpi/ldi reg1, imm
+                      <op>i reg1, imm
                     }
-                    if MatchOpType(p,top_reg,top_const) and
+                    if MatchOpType(taicpu(p),top_reg,top_const) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
-                       MatchInstruction(hp1,[A_CP,A_MOV],2) and
+                       MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
-                       MatchOpType(hp1,top_reg,top_reg) and
+                       MatchOpType(taicpu(hp1),top_reg,top_reg) and
                        (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
-                       (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) then
+                       (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
+                       not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
                       begin
-                        CopyUsedRegs(TmpUsedRegs);
+                        TransferUsedRegs(TmpUsedRegs);
+                        UpdateUsedRegs(TmpUsedRegs,tai(p.next));
+                        UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
                         if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
                           begin
                             case taicpu(hp1).opcode of
@@ -328,6 +401,10 @@ Implementation
                                 taicpu(hp1).opcode:=A_CPI;
                               A_MOV:
                                 taicpu(hp1).opcode:=A_LDI;
+                              A_AND:
+                                taicpu(hp1).opcode:=A_ANDI;
+                              A_SUB:
+                                taicpu(hp1).opcode:=A_SUBI;
                               else
                                 internalerror(2016111901);
                             end;
@@ -344,16 +421,10 @@ Implementation
                                 dealloc.Free;
                               end;
 
-                            DebugMsg('Peephole LdiMov/Cp2Ldi/Cpi performed', p);
-
-                            GetNextInstruction(p,hp1);
-                            asml.Remove(p);
-                            p.Free;
-                            p:=hp1;
+                            DebugMsg('Peephole LdiOp2Opi performed', p);
 
-                            result:=true;
+                            RemoveCurrentP(p);
                           end;
-                        ReleaseUsedRegs(TmpUsedRegs);
                       end;
                   end;
                 A_STS:
@@ -362,13 +433,20 @@ Implementation
                     (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[0]^.ref^.offset>=32) and
-                    (taicpu(p).oper[0]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=0) and
+                      (taicpu(p).oper[0]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=32) and
+                      (taicpu(p).oper[0]^.ref^.offset<=95))) then
                     begin
                       DebugMsg('Peephole Sts2Out performed', p);
 
                       taicpu(p).opcode:=A_OUT;
-                      taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
                     end;
                 A_LDS:
                   if (taicpu(p).oper[1]^.ref^.symbol=nil) and
@@ -376,13 +454,20 @@ Implementation
                     (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[1]^.ref^.offset>=32) and
-                    (taicpu(p).oper[1]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=0) and
+                      (taicpu(p).oper[1]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=32) and
+                      (taicpu(p).oper[1]^.ref^.offset<=95))) then
                     begin
                       DebugMsg('Peephole Lds2In performed', p);
 
                       taicpu(p).opcode:=A_IN;
-                      taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
                     end;
                 A_IN:
                     if GetNextInstruction(p,hp1) then
@@ -486,6 +571,46 @@ Implementation
                             result:=true;
                           end;
                       end;
+                A_SBRS,
+                A_SBRC:
+                  begin
+                    {
+                      Turn
+                        in rx, y
+                        sbr* rx, z
+                      Into
+                        sbi* y, z
+                    }
+                    if (taicpu(p).ops=2) and
+                       (taicpu(p).oper[0]^.typ=top_reg) and
+                       assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
+                       GetLastInstruction(p,hp1) and
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).opcode=A_IN) and
+                       (taicpu(hp1).ops=2) and
+                       (taicpu(hp1).oper[1]^.typ=top_const) and
+                       (taicpu(hp1).oper[1]^.val in [0..31]) and
+                       MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
+                       (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
+                      begin
+                        if taicpu(p).opcode=A_SBRS then
+                          taicpu(p).opcode:=A_SBIS
+                        else
+                          taicpu(p).opcode:=A_SBIC;
+
+                        taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
+
+                        DebugMsg('Peephole InSbrx2Sbix performed', p);
+
+                        asml.Remove(hp1);
+                        hp1.free;
+
+                        result:=true;
+                      end;
+
+                    if InvertSkipInstruction(p) then
+                      result:=true;
+                  end;
                 A_ANDI:
                   begin
                     {
@@ -543,15 +668,32 @@ Implementation
                       begin
                         DebugMsg('Redundant Andi removed', p);
 
-                        GetNextInstruction(p,hp1);
+                        result:=RemoveCurrentP(p);
+                      end;
+                  end;
+                A_ADD:
+                  begin
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
+                    GetNextInstruction(p, hp1) and
+                    MatchInstruction(hp1,A_ADC) then
+                    begin
+                      DebugMsg('Peephole AddAdc2Add performed', p);
 
-                        AsmL.Remove(p);
-                        p.free;
+                      result:=RemoveCurrentP(p);
+                    end;
+                  end;
+                A_SUB:
+                  begin
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
+                    GetNextInstruction(p, hp1) and
+                    MatchInstruction(hp1,A_SBC) then
+                    begin
+                      DebugMsg('Peephole SubSbc2Sub performed', p);
 
-                        p:=hp1;
+                      taicpu(hp1).opcode:=A_SUB;
 
-                        result:=true;
-                      end;
+                      result:=RemoveCurrentP(p);
+                    end;
                   end;
                 A_CLR:
                   begin
@@ -573,10 +715,7 @@ Implementation
                       begin
                         DebugMsg('Peephole ClrMov2Mov performed', p);
 
-                        asml.Remove(p);
-                        p.Free;
-                        p:=hp1;
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end
                     { turn
                       clr rX
@@ -600,7 +739,7 @@ Implementation
                       begin
                         DebugMsg('Peephole ClrAdc2Adc performed', p);
 
-                        taicpu(hp1).oper[1]^.reg:=NR_R1;
+                        taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
 
                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
@@ -613,12 +752,7 @@ Implementation
                             dealloc.Free;
                           end;
 
-                        GetNextInstruction(p,hp1);
-                        asml.Remove(p);
-                        p.free;
-                        p:=hp1;
-
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end;
                   end;
                 A_PUSH:
@@ -648,27 +782,22 @@ Implementation
                        GetNextInstruction(hp2,hp3) and
                        MatchInstruction(hp3,A_POP) then
                       begin
-                       if (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
+                       if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
+                         (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
                          (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
                          begin
                            DebugMsg('Peephole PushPushPopPop2Movw performed', p);
 
-                           taicpu(p).ops:=2;
-                           taicpu(p).opcode:=A_MOVW;
+                           taicpu(hp3).ops:=2;
+                           taicpu(hp3).opcode:=A_MOVW;
 
-                           taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
-                           taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
+                           taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
 
-                           asml.Remove(hp1);
-                           hp1.Free;
-                           asml.Remove(hp2);
-                           hp2.Free;
-                           asml.Remove(hp3);
-                           hp3.Free;
-
-                           result:=true;
+                           RemoveCurrentP(p);
+                           RemoveCurrentP(p);
+                           result:=RemoveCurrentP(p);
                          end
                        else
                          begin
@@ -686,6 +815,17 @@ Implementation
                            taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
                            taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
 
+                           { life range of reg2 and reg3 is increased, fix register allocation entries }
+                           TransferUsedRegs(TmpUsedRegs);
+                           UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
+                           AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
+
+                           TransferUsedRegs(TmpUsedRegs);
+                           AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
+
+                           IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
+                           UpdateUsedRegs(tai(p.Next));
+
                            asml.Remove(hp2);
                            hp2.Free;
                            asml.Remove(hp3);
@@ -708,27 +848,60 @@ Implementation
                        asml.Remove(hp1);
                        hp1.Free;
 
+                       result:=true;
+                    end;
+                A_RCALL:
+                  if (cs_opt_level4 in current_settings.optimizerswitches) and
+                    GetNextInstruction(p,hp1) and
+                    MatchInstruction(hp1,A_RET) then
+                    begin
+                       DebugMsg('Peephole RCallReg2RJmp performed', p);
+
+                       taicpu(p).opcode:=A_RJMP;
+
+                       asml.Remove(hp1);
+                       hp1.Free;
+
                        result:=true;
                     end;
                 A_MOV:
                   begin
+                    { change
+                      mov reg0, reg1
+                      dealloc reg0
+                      into
+                      dealloc reg0
+                    }
+                    if MatchOpType(taicpu(p),top_reg,top_reg) then
+                      begin
+                        TransferUsedRegs(TmpUsedRegs);
+                        UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
+                        if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
+                          { reg. allocation information before calls is not perfect, so don't do this before
+                            calls/icalls }
+                          GetNextInstruction(p,hp1) and
+                          not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
+                          begin
+                            DebugMsg('Peephole Mov2Nop performed', p);
+                            result:=RemoveCurrentP(p);
+                            exit;
+                          end;
+                      end;
+
                     { turn
                       mov reg0, reg1
-                      push reg0
+                      <op> reg2,reg0
                       dealloc reg0
                       into
-                      push reg1
+                      <op> reg2,reg1
                     }
-                    if (taicpu(p).ops=2) and
-                       (taicpu(p).oper[0]^.typ = top_reg) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
+                    if MatchOpType(taicpu(p),top_reg,top_reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
-                       (hp1.typ = ait_instruction) and
-                       (taicpu(hp1).opcode in [A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
-                                               A_STD,A_ST,
-                                               A_OUT,A_IN]) and
-                       RegInInstruction(taicpu(p).oper[0]^.reg, hp1) and
+                       (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
+                                               A_OUT,A_IN]) or
+                       { the reference register of ST/STD cannot be replaced }
+                       (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
                        (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
                        {(taicpu(hp1).ops=1) and
                        (taicpu(hp1).oper[0]^.typ = top_reg) and
@@ -753,11 +926,12 @@ Implementation
                             dealloc.Free;
                           end;
 
-                        GetNextInstruction(p,hp1);
-                        asml.Remove(p);
-                        p.free;
-                        p:=hp1;
-                        result:=true;
+                        { life range of reg1 is increased }
+                        AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
+                        { p will be removed, update used register as we continue
+                          with the next instruction after p }
+
+                        result:=RemoveCurrentP(p);
                       end
                     { remove
                       mov reg0,reg0
@@ -769,11 +943,7 @@ Implementation
                       begin
                         DebugMsg('Peephole RedundantMov performed', p);
 
-                        GetNextInstruction(p,hp1);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp1;
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end
                     {
                       Turn
@@ -784,22 +954,20 @@ Implementation
                         op ry,rz
                     }
                     else if (taicpu(p).ops=2) and
-                       (taicpu(p).oper[0]^.typ = top_reg) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(p),top_reg,top_reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (hp1.typ=ait_instruction) and
                        (taicpu(hp1).ops >= 1) and
                        (taicpu(hp1).oper[0]^.typ = top_reg) and
                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
-                       (hp2.typ=ait_instruction) and
-                       (taicpu(hp2).opcode=A_MOV) and
-                       (taicpu(hp2).oper[0]^.typ = top_reg) and
-                       (taicpu(hp2).oper[1]^.typ = top_reg) and
+                       MatchInstruction(hp2,A_MOV) and
+                       MatchOpType(taicpu(hp2),top_reg,top_reg) and
                        (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
                        (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
+                                               A_INC,A_DEC,
                                                A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
                       begin
@@ -823,16 +991,10 @@ Implementation
                             dealloc.Free;
                           end;
 
-                        GetNextInstruction(p,hp1);
-
-                        asml.remove(p);
-                        p.free;
                         asml.remove(hp2);
                         hp2.free;
 
-                        p:=hp1;
-
-                        result:=true;
+                        result:=RemoveCurrentP(p);
                       end
                     {
                       Turn
@@ -843,18 +1005,15 @@ Implementation
                         op rw,ry
                     }
                     else if (taicpu(p).ops=2) and
-                       (taicpu(p).oper[0]^.typ = top_reg) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(p),top_reg,top_reg) and
                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (hp1.typ=ait_instruction) and
                        (taicpu(hp1).ops = 2) and
-                       (taicpu(hp1).oper[0]^.typ = top_reg) and
-                       (taicpu(hp1).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(hp1),top_reg,top_reg) and
                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
                        (hp2.typ=ait_instruction) and
                        (taicpu(hp2).opcode=A_MOV) and
-                       (taicpu(hp2).oper[0]^.typ = top_reg) and
-                       (taicpu(hp2).oper[1]^.typ = top_reg) and
+                       MatchOpType(taicpu(hp2),top_reg,top_reg) and
                        (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
@@ -878,16 +1037,10 @@ Implementation
                             dealloc.Free;
                           end;
 
-                        GetNextInstruction(p,hp1);
+                        result:=RemoveCurrentP(p);
 
-                        asml.remove(p);
-                        p.free;
                         asml.remove(hp2);
                         hp2.free;
-
-                        p:=hp1;
-
-                        result:=true;
                       end
                     { fold
                       mov reg2,reg0
@@ -917,6 +1070,8 @@ Implementation
                           begin
                             asml.Remove(alloc);
                             asml.InsertBefore(alloc,p);
+                            { proper book keeping of currently used registers }
+                            IncludeRegInUsedRegs(taicpu(hp1).oper[0]^.reg,UsedRegs);
                           end;
 
                         taicpu(p).opcode:=A_MOVW;
@@ -929,19 +1084,17 @@ Implementation
                       mov rX,...
                       mov rX,...
                     }
-                    else if (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_MOV) then
-                      while (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_MOV) and
+                    else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) then
+                      while MatchInstruction(hp1,A_MOV) and
                             MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
                             { don't remove the first mov if the second is a mov rX,rX }
                             not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
                         begin
                           DebugMsg('Peephole MovMov2Mov performed', p);
 
-                          asml.remove(p);
-                          p.free;
-                          p:=hp1;
+                          result:=RemoveCurrentP(p);
+
                           GetNextInstruction(hp1,hp1);
-                          result:=true;
                           if not assigned(hp1) then
                             break;
                         end;
@@ -961,33 +1114,8 @@ Implementation
                           op
                         .L1:
                     }
-                    if GetNextInstruction(p, hp1) and
-                       (hp1.typ=ait_instruction) and
-                       (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
-                       (taicpu(hp1).ops>0) and
-                       (taicpu(hp1).oper[0]^.typ = top_ref) and
-                       (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
-                       GetNextInstruction(hp1, hp2) and
-                       (hp2.typ=ait_instruction) and
-                       (not taicpu(hp2).is_jmp) and
-                       GetNextInstruction(hp2, hp3) and
-                       (hp3.typ=ait_label) and
-                       (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
-                      begin
-                        DebugMsg('Peephole SbiJmp2Sbi performed',p);
-
-                        if taicpu(p).opcode=A_SBIC then
-                          taicpu(p).opcode:=A_SBIS
-                        else
-                          taicpu(p).opcode:=A_SBIC;
-
-                        tai_label(hp3).labsym.decrefs;
-
-                        AsmL.remove(hp1);
-                        taicpu(hp1).Free;
-
-                        result:=true;
-                      end
+                    if InvertSkipInstruction(p) then
+                      result:=true
                     {
                       Turn
                           sbiX X, y
@@ -1055,3 +1183,4 @@ Implementation
 begin
   casmoptimizer:=TCpuAsmOptimizer;
 End.
+

+ 0 - 4
compiler/avr/aoptcpub.pas

@@ -75,10 +75,6 @@ Const
 
   MaxCh = 2;
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 2;
-
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 

+ 39 - 35
compiler/avr/avrreg.dat

@@ -2,43 +2,47 @@
 ; AVR registers
 ;
 ; layout
-; <name>,<type>,<value>,<stdname>,<stab idx>,<dwarf idx>
+; <name>,<type>,<subreg>,<value>,<stdname>,<stab idx>,<dwarf idx>
 ;
-NO,$00,$00,INVALID,-1,-1
+NO,$00,$00,$00,INVALID,-1,-1
 
-R0,$01,$00,r0,0,0
-R1,$01,$01,r1,1,1
-R2,$01,$02,r2,2,2
-R3,$01,$03,r3,3,3
-R4,$01,$04,r4,4,4
-R5,$01,$05,r5,5,5
-R6,$01,$06,r6,6,6
-R7,$01,$07,r7,7,7
-R8,$01,$08,r8,8,8
-R9,$01,$09,r9,9,9
-R10,$01,$0a,r10,10,10
-R11,$01,$0b,r11,11,11
-R12,$01,$0c,r12,12,12
-R13,$01,$0d,r13,13,13
-R14,$01,$0e,r14,14,14
-R15,$01,$0f,r15,15,15
-R16,$01,$10,r16,16,16
-R17,$01,$11,r17,17,17
-R18,$01,$12,r18,18,18
-R19,$01,$13,r19,19,19
-R20,$01,$14,r20,20,20
-R21,$01,$15,r21,21,21
-R22,$01,$16,r22,22,22
-R23,$01,$17,r23,23,23
-R24,$01,$18,r24,24,24
-R25,$01,$19,r25,25,25
-R26,$01,$1a,r26,26,26
-R27,$01,$1b,r27,27,27
-R28,$01,$1c,r28,28,28
-R29,$01,$1d,r29,29,29
-R30,$01,$1e,r30,30,30
-R31,$01,$1f,r31,31,31
+R0,$01,$00,$00,r0,0,0
+R1,$01,$00,$01,r1,1,1
+R2,$01,$00,$02,r2,2,2
+R3,$01,$00,$03,r3,3,3
+R4,$01,$00,$04,r4,4,4
+R5,$01,$00,$05,r5,5,5
+R6,$01,$00,$06,r6,6,6
+R7,$01,$00,$07,r7,7,7
+R8,$01,$00,$08,r8,8,8
+R9,$01,$00,$09,r9,9,9
+R10,$01,$00,$0a,r10,10,10
+R11,$01,$00,$0b,r11,11,11
+R12,$01,$00,$0c,r12,12,12
+R13,$01,$00,$0d,r13,13,13
+R14,$01,$00,$0e,r14,14,14
+R15,$01,$00,$0f,r15,15,15
+R16,$01,$00,$10,r16,16,16
+R17,$01,$00,$11,r17,17,17
+R18,$01,$00,$12,r18,18,18
+R19,$01,$00,$13,r19,19,19
+R20,$01,$00,$14,r20,20,20
+R21,$01,$00,$15,r21,21,21
+R22,$01,$00,$16,r22,22,22
+R23,$01,$00,$17,r23,23,23
+R24,$01,$00,$18,r24,24,24
+R25,$01,$00,$19,r25,25,25
+R26,$01,$00,$1a,r26,26,26
+R27,$01,$00,$1b,r27,27,27
+R28,$01,$00,$1c,r28,28,28
+R29,$01,$00,$1d,r29,29,29
+R30,$01,$00,$1e,r30,30,30
+R31,$01,$00,$1f,r31,31,31
 
-SREG,$05,$00,sreg,0,0
+X,$01,$03,$1a,x,26,26
+Y,$01,$03,$1c,y,28,28
+Z,$01,$03,$1e,z,30,30
+
+SREG,$05,$00,$00,sreg,0,0
 
 

+ 22 - 0
compiler/avr/ccpuinnr.inc

@@ -0,0 +1,22 @@
+{
+
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2016 by the Free Pascal development team.
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+  in_avr_cli = in_cpu_first,
+  in_avr_sei = in_cpu_first+1,
+  in_avr_wdr = in_cpu_first+2,
+  in_avr_sleep = in_cpu_first+3,
+  in_avr_nop = in_cpu_first+4,
+  in_avr_save = in_cpu_first+5,
+  in_avr_restore = in_cpu_first+6
+

File diff suppressed because it is too large
+ 374 - 281
compiler/avr/cgcpu.pas


+ 48 - 53
compiler/avr/cpubase.pas

@@ -128,9 +128,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_R0,RS_R1,RS_R18..RS_R27,RS_R30,RS_R31];
       VOLATILE_FPUREGISTERS = [];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                                 Conditions
 *****************************************************************************}
@@ -158,7 +155,7 @@ unit cpubase;
 
     type
       TResFlags = (F_NotPossible,F_CC,F_CS,F_EQ,F_GE,F_LO,F_LT,
-        F_NE,F_SH,F_VC,F_VS);
+        F_NE,F_SH,F_VC,F_VS,F_PL,F_MI);
 
 {*****************************************************************************
                                 Operands
@@ -171,7 +168,7 @@ unit cpubase;
 *****************************************************************************}
 
     const
-      max_operands = 4;
+      max_operands = 2;
 
       maxintregs = 15;
       maxfpuregs = 0;
@@ -232,8 +229,8 @@ unit cpubase;
 *****************************************************************************}
 
       { Stack pointer register }
-      NR_STACK_POINTER_REG = NR_R13;
-      RS_STACK_POINTER_REG = RS_R13;
+      NR_STACK_POINTER_REG = NR_INVALID;
+      RS_STACK_POINTER_REG = RS_INVALID;
       { Frame pointer register }
       RS_FRAME_POINTER_REG = RS_R28;
       NR_FRAME_POINTER_REG = NR_R28;
@@ -278,17 +275,6 @@ unit cpubase;
 *****************************************************************************}
 
     const
-      { Registers which must be saved when calling a routine declared as
-        cppdecl, cdecl, stdcall, safecall, palmossyscall. The registers
-        saved should be the ones as defined in the target ABI and / or GCC.
-
-        This value can be deduced from the CALLED_USED_REGISTERS array in the
-        GCC source.
-      }
-      { on avr, gen_entry/gen_exit code saves/restores registers, so
-        we don't need this array }
-      saved_standard_registers : array[0..0] of tsuperregister =
-        (RS_INVALID);
       { Required parameter alignment when calling a routine declared as
         stdcall and cdecl. The alignment value should be the one defined
         by GCC or the target ABI.
@@ -298,9 +284,6 @@ unit cpubase;
       }
       std_param_align = 4;
 
-      saved_address_registers : array[0..0] of tsuperregister = (RS_INVALID);
-      saved_mm_registers : array[0..0] of tsuperregister = (RS_INVALID);
-
 {*****************************************************************************
                                   Helpers
 *****************************************************************************}
@@ -317,22 +300,19 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
-    function dwarf_reg(r:tregister):byte;
-    function GetHigh(const r : TRegister) : TRegister;
-
-    { returns the next virtual register }
-    function GetNextReg(const r : TRegister) : TRegister;
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
 
-    { returns the last virtual register }
-    function GetLastReg(const r : TRegister) : TRegister;
+    function dwarf_reg(r:tregister):byte;
+    function dwarf_reg_no_error(r:tregister):shortint;
+    function eh_return_data_regno(nr: longint): longint;
 
-    { returns the register with the offset of ofs of a continuous set of register starting with r }
-    function GetOffsetReg(const r : TRegister;ofs : shortint) : TRegister;
-    { returns the register with the offset of ofs of a continuous set of register starting with r and being continued with rhi }
-    function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    function GetDefaultTmpReg : TRegister;
+    function GetDefaultZeroReg : TRegister;
+
   implementation
 
     uses
@@ -376,7 +356,7 @@ unit cpubase;
       const
         inv_flags: array[TResFlags] of TResFlags =
           (F_NotPossible,F_CS,F_CC,F_NE,F_LT,F_SH,F_GE,
-           F_NE,F_LO,F_VS,F_VC);
+           F_NE,F_LO,F_VS,F_VC,F_MI,F_PL);
       begin
         f:=inv_flags[f];
       end;
@@ -384,9 +364,9 @@ unit cpubase;
 
     function flags_to_cond(const f: TResFlags) : TAsmCond;
       const
-        flag_2_cond: array[F_CC..F_VS] of TAsmCond =
+        flag_2_cond: array[F_CC..F_MI] of TAsmCond =
           (C_CC,C_CS,C_EQ,C_GE,C_LO,C_LT,
-           C_NE,C_SH,C_VC,C_VS);
+           C_NE,C_SH,C_VC,C_VS,C_PL,C_MI);
       begin
         if f=F_NotPossible then
           internalerror(2011022101);
@@ -436,6 +416,24 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE]);
+            C_LT:
+              Result := (c in [C_NE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function rotl(d : dword;b : byte) : dword;
       begin
          result:=(d shr (32-b)) or (d shl b);
@@ -453,42 +451,39 @@ unit cpubase;
       end;
 
 
-    function GetHigh(const r : TRegister) : TRegister;
-      begin
-        result:=TRegister(longint(r)+1)
-      end;
-
-
-    function GetNextReg(const r: TRegister): TRegister;
+    function dwarf_reg_no_error(r:tregister):shortint;
       begin
-        result:=TRegister(longint(r)+1);
+        result:=regdwarf_table[findreg_by_number(r)];
       end;
 
 
-    function GetLastReg(const r: TRegister): TRegister;
+    function eh_return_data_regno(nr: longint): longint;
       begin
-        result:=TRegister(longint(r)-1);
+        result:=-1;
       end;
 
 
-    function GetOffsetReg(const r: TRegister;ofs : shortint): TRegister;
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       begin
-        result:=TRegister(longint(r)+ofs);
+        is_calljmp:= o in call_jmp_instructions;
       end;
 
 
-    function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
+    function GetDefaultTmpReg: TRegister;
       begin
-        if ofs>3 then
-          result:=TRegister(longint(rhi)+ofs-4)
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R16
         else
-          result:=TRegister(longint(r)+ofs);
+          Result:=NR_R0;
       end;
 
 
-    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+    function GetDefaultZeroReg: TRegister;
       begin
-        is_calljmp:= o in call_jmp_instructions;
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R17
+        else
+          Result:=NR_R1;
       end;
 
 

+ 445 - 287
compiler/avr/cpuinfo.pas

@@ -14,6 +14,8 @@
 
 Unit CPUInfo;
 
+{$i fpcdefs.inc}
+
 Interface
 
   uses
@@ -35,6 +37,7 @@ Type
    { possible supported processors for this target }
    tcputype =
       (cpu_none,
+       cpu_avrtiny,
        cpu_avr1,
        cpu_avr2,
        cpu_avr25,
@@ -44,13 +47,14 @@ Type
        cpu_avr4,
        cpu_avr5,
        cpu_avr51,
-       cpu_avr6
+       cpu_avr6,
+       cpu_avrxmega3
       );
 
    tfputype =
      (fpu_none,
       fpu_soft,
-      fp_libgcc
+      fpu_libgcc
      );
 
    tcontrollertype =
@@ -58,150 +62,225 @@ Type
 
       ct_avrsim,
 
-      ct_atmega645,
-      ct_atmega165a,
-      ct_attiny44a,
-      ct_atmega649a,
-      ct_atmega32u4,
-      ct_attiny26,
-      ct_at90usb1287,
+      ct_at90can32,
+      ct_at90can64,
+      ct_at90can128,
+      ct_at90pwm1,
+      ct_at90pwm2b,
+      ct_at90pwm3b,
+      ct_at90pwm81,
       ct_at90pwm161,
-      ct_attiny48,
-      ct_atmega168p,
-      ct_attiny10,
-      ct_attiny84a,
+      ct_at90pwm216,
+      ct_at90pwm316,
       ct_at90usb82,
-      ct_attiny2313,
-      ct_attiny461,
-      ct_atmega3250pa,
-      ct_atmega3290a,
-      ct_atmega165p,
-      ct_attiny43u,
       ct_at90usb162,
-      ct_atmega16u4,
-      ct_attiny24a,
-      ct_atmega88p,
-      ct_attiny88,
-      ct_atmega6490p,
-      ct_attiny40,
-      ct_atmega324p,
-      ct_attiny167,
-      ct_atmega328,
-      ct_attiny861,
-      ct_attiny85,
-      ct_atmega64m1,
-      ct_atmega645p,
-      ct_atmega8u2,
-      ct_atmega329a,
-      ct_atmega8a,
-      ct_atmega324pa,
-      ct_atmega32hvb,
-      ct_at90pwm316,
-      ct_at90pwm3b,
       ct_at90usb646,
-      ct_attiny20,
-      ct_atmega16,
-      ct_atmega48a,
-      ct_attiny24,
-      ct_atmega644,
-      ct_atmega1284,
+      ct_at90usb647,
+      ct_at90usb1286,
+      ct_at90usb1287,
       ct_ata6285,
-      ct_at90can64,
-      ct_atmega48,
-      ct_at90can32,
-      ct_attiny9,
-      ct_attiny87,
-      ct_atmega1281,
-      ct_at90pwm216,
-      ct_atmega3250a,
-      ct_atmega88a,
-      ct_atmega128rfa1,
-      ct_atmega3290pa,
-      ct_at90pwm81,
-      ct_atmega325p,
-      ct_attiny84,
-      ct_atmega328p,
-      ct_attiny13a,
+      ct_ata6286,
       ct_atmega8,
-      ct_atmega1284p,
+      ct_atmega8a,
+      ct_atmega8hva,
+      ct_atmega8u2,
+      ct_atmega16,
+      ct_atmega16a,
+      ct_atmega16hva,
+      ct_atmega16hvb,
+      ct_atmega16hvbrevb,
+      ct_atmega16m1,
       ct_atmega16u2,
-      ct_attiny45,
-      ct_atmega3250,
-      ct_atmega329,
+      ct_atmega16u4,
+      ct_atmega32,
       ct_atmega32a,
-      ct_attiny5,
-      ct_at90can128,
-      ct_atmega6490,
-      ct_atmega8515,
+      ct_atmega32c1,
+      ct_atmega32hvb,
+      ct_atmega32hvbrevb,
+      ct_atmega32m1,
+      ct_atmega32u2,
+      ct_atmega32u4,
+      ct_atmega48,
+      ct_atmega48a,
+      ct_atmega48p,
+      ct_atmega48pa,
+      ct_atmega48pb,
+      ct_atmega64,
+      ct_atmega64a,
+      ct_atmega64c1,
+      ct_atmega64hve2,
+      ct_atmega64m1,
+      ct_atmega64rfr2,
+      ct_atmega88,
+      ct_atmega88a,
+      ct_atmega88p,
       ct_atmega88pa,
-      ct_atmega168a,
+      ct_atmega88pb,
       ct_atmega128,
-      ct_at90usb1286,
-      ct_atmega164pa,
-      ct_attiny828,
-      ct_atmega88,
-      ct_atmega645a,
-      ct_atmega3290p,
-      ct_atmega644p,
-      ct_atmega164a,
-      ct_attiny4313,
-      ct_atmega162,
-      ct_atmega32c1,
       ct_atmega128a,
-      ct_atmega324a,
-      ct_attiny13,
-      ct_atmega2561,
+      ct_atmega128rfa1,
+      ct_atmega128rfr2,
+      ct_atmega162,
+      ct_atmega164a,
+      ct_atmega164p,
+      ct_atmega164pa,
+      ct_atmega165a,
+      ct_atmega165p,
+      ct_atmega165pa,
+      ct_atmega168,
+      ct_atmega168a,
+      ct_atmega168p,
+      ct_atmega168pa,
+      ct_atmega168pb,
       ct_atmega169a,
-      ct_attiny261,
-      ct_atmega644a,
-      ct_atmega3290,
-      ct_atmega64a,
       ct_atmega169p,
-      ct_atmega2560,
-      ct_atmega32,
-      ct_attiny861a,
-      ct_attiny28,
-      ct_atmega48p,
-      ct_atmega8535,
-      ct_atmega168pa,
-      ct_atmega16m1,
-      ct_atmega16hvb,
-      ct_atmega164p,
+      ct_atmega169pa,
+      ct_atmega256rfr2,
+      ct_atmega324a,
+      ct_atmega324p,
+      ct_atmega324pa,
+      ct_atmega324pb,
+      ct_atmega325,
       ct_atmega325a,
+      ct_atmega325p,
+      ct_atmega325pa,
+      ct_atmega328,
+      ct_atmega328p,
+      ct_atmega328pb,
+      ct_atmega329,
+      ct_atmega329a,
+      ct_atmega329p,
+      ct_atmega329pa,
+      ct_atmega406,
       ct_atmega640,
+      ct_atmega644,
+      ct_atmega644a,
+      ct_atmega644p,
+      ct_atmega644pa,
+      ct_atmega644rfr2,
+      ct_atmega645,
+      ct_atmega645a,
+      ct_atmega645p,
+      ct_atmega649,
+      ct_atmega649a,
+      ct_atmega649p,
+      ct_atmega808,
+      ct_atmega809,
+      ct_atmega1280,
+      ct_atmega1281,
+      ct_atmega1284,
+      ct_atmega1284p,
+      ct_atmega1284rfr2,
+      ct_atmega1608,
+      ct_atmega1609,
+      ct_atmega2560,
+      ct_atmega2561,
+      ct_atmega2564rfr2,
+      ct_atmega3208,
+      ct_atmega3209,
+      ct_atmega3250,
+      ct_atmega3250a,
+      ct_atmega3250p,
+      ct_atmega3250pa,
+      ct_atmega3290,
+      ct_atmega3290a,
+      ct_atmega3290p,
+      ct_atmega3290pa,
+      ct_atmega4808,
+      ct_atmega4809,
       ct_atmega6450,
-      ct_atmega329p,
-      ct_ata6286,
-      ct_at90usb647,
-      ct_atmega168,
+      ct_atmega6450a,
+      ct_atmega6450p,
+      ct_atmega6490,
       ct_atmega6490a,
-      ct_atmega32m1,
-      ct_atmega64c1,
-      ct_atmega32u2,
+      ct_atmega6490p,
+      ct_atmega8515,
+      ct_atmega8535,
       ct_attiny4,
-      ct_atmega644pa,
-      ct_at90pwm1,
+      ct_attiny5,
+      ct_attiny9,
+      ct_attiny10,
+      ct_attiny11,
+      ct_attiny12,
+      ct_attiny13,
+      ct_attiny13a,
+      ct_attiny15,
+      ct_attiny20,
+      ct_attiny24,
+      ct_attiny24a,
+      ct_attiny25,
+      ct_attiny26,
+      ct_attiny28,
+      ct_attiny40,
+      ct_attiny43u,
       ct_attiny44,
-      ct_atmega325pa,
-      ct_atmega6450a,
-      ct_attiny2313a,
-      ct_atmega329pa,
+      ct_attiny44a,
+      ct_attiny45,
+      ct_attiny48,
+      ct_attiny84,
+      ct_attiny84a,
+      ct_attiny85,
+      ct_attiny87,
+      ct_attiny88,
+      ct_attiny102,
+      ct_attiny104,
+      ct_attiny167,
+      ct_attiny202,
+      ct_attiny204,
+      ct_attiny212,
+      ct_attiny214,
+      ct_attiny261,
+      ct_attiny261a,
+      ct_attiny402,
+      ct_attiny404,
+      ct_attiny406,
+      ct_attiny412,
+      ct_attiny414,
+      ct_attiny416,
+      ct_attiny416auto,
+      ct_attiny417,
+      ct_attiny441,
+      ct_attiny461,
       ct_attiny461a,
-      ct_atmega6450p,
-      ct_atmega64,
-      ct_atmega165pa,
-      ct_atmega16a,
-      ct_atmega649,
-      ct_atmega1280,
-      ct_at90pwm2b,
-      ct_atmega649p,
-      ct_atmega3250p,
-      ct_atmega48pa,
+      ct_attiny804,
+      ct_attiny806,
+      ct_attiny807,
+      ct_attiny814,
+      ct_attiny816,
+      ct_attiny817,
+      ct_attiny828,
+      ct_attiny841,
+      ct_attiny861,
+      ct_attiny861a,
+      ct_attiny1604,
+      ct_attiny1606,
+      ct_attiny1607,
+      ct_attiny1614,
+      ct_attiny1616,
+      ct_attiny1617,
+      ct_attiny1624,
+      ct_attiny1626,
+      ct_attiny1627,
       ct_attiny1634,
-      ct_atmega325,
-      ct_atmega169pa,
-      ct_attiny261a,
-      ct_attiny25
+      ct_attiny2313,
+      ct_attiny2313a,
+      ct_attiny3214,
+      ct_attiny3216,
+      ct_attiny3217,
+      ct_attiny4313,
+      // Controller board aliases
+      ct_arduinoleonardo,
+      ct_arduinomega,
+      ct_arduinomicro,
+      ct_arduinonano,
+      ct_arduinonanoevery,
+      ct_arduinouno,
+      ct_atmega256rfr2xpro,
+      ct_atmega324pbxpro,
+      ct_atmega1284pxplained,
+      ct_atmega4809xpro,
+      ct_attiny817xpro,
+      ct_attiny3217xpro
      );
 
    tcontrollerdatatype = record
@@ -216,8 +295,6 @@ Const
    ControllerSupport = true;
    {# Size of native extended floating point type }
    extended_size = 12;
-   {# Size of a multimedia register               }
-   mmreg_size = 16;
    { target cpu string (used by compiler options) }
    target_cpu_string = 'avr';
 
@@ -234,7 +311,8 @@ Const
      pocall_softfloat
    ];
 
-   cputypestr : array[tcputype] of string[5] = ('',
+   cputypestr : array[tcputype] of string[9] = ('',
+     'AVRTINY',
      'AVR1',
      'AVR2',
      'AVR25',
@@ -244,7 +322,8 @@ Const
      'AVR4',
      'AVR5',
      'AVR51',
-     'AVR6'
+     'AVR6',
+     'AVRXMEGA3'
    );
 
    fputypestr : array[tfputype] of string[6] = (
@@ -282,150 +361,225 @@ Const
         eeprombase:0;
         eepromsize:4096;
         )
-        ,(controllertypestr:'ATMEGA645'; controllerunitstr:'ATMEGA645'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165A'; controllerunitstr:'ATMEGA165A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44A'; controllerunitstr:'ATTINY44A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA649A'; controllerunitstr:'ATMEGA649A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U4'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY26'; controllerunitstr:'ATTINY26'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'AT90USB1287'; controllerunitstr:'AT90USB1287'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM161'; controllerunitstr:'AT90PWM161'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY48'; controllerunitstr:'ATTINY48'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA168P'; controllerunitstr:'ATMEGA168P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY10'; controllerunitstr:'ATTINY10'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY84A'; controllerunitstr:'ATTINY84A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB82'; controllerunitstr:'AT90USB82'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY2313'; controllerunitstr:'ATTINY2313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY461'; controllerunitstr:'ATTINY461'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250PA'; controllerunitstr:'ATMEGA3250PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA3290A'; controllerunitstr:'ATMEGA3290A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA165P'; controllerunitstr:'ATMEGA165P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY43U'; controllerunitstr:'ATTINY43U'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'AT90USB162'; controllerunitstr:'AT90USB162'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16U4'; controllerunitstr:'ATMEGA16U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1280; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY24A'; controllerunitstr:'ATTINY24A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA88P'; controllerunitstr:'ATMEGA88P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY88'; controllerunitstr:'ATTINY88'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA6490P'; controllerunitstr:'ATMEGA6490P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY40'; controllerunitstr:'ATTINY40'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:64; sramsize:256; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA324P'; controllerunitstr:'ATMEGA324P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY167'; controllerunitstr:'ATTINY167'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328'; controllerunitstr:'ATMEGA328'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861'; controllerunitstr:'ATTINY861'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY85'; controllerunitstr:'ATTINY85'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA64M1'; controllerunitstr:'ATMEGA64M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA645P'; controllerunitstr:'ATMEGA645P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8U2'; controllerunitstr:'ATMEGA8U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA329A'; controllerunitstr:'ATMEGA329A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA8A'; controllerunitstr:'ATMEGA8A'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA324PA'; controllerunitstr:'ATMEGA324PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32HVB'; controllerunitstr:'ATMEGA32HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM316'; controllerunitstr:'AT90PWM316'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90PWM3B'; controllerunitstr:'AT90PWM3B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB646'; controllerunitstr:'AT90USB646'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY20'; controllerunitstr:'ATTINY20'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:64; sramsize:128; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA16'; controllerunitstr:'ATMEGA16'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA48A'; controllerunitstr:'ATMEGA48A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY24'; controllerunitstr:'ATTINY24'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644'; controllerunitstr:'ATMEGA644'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1284'; controllerunitstr:'ATMEGA1284'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATA6285'; controllerunitstr:'ATA6285'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90CAN64'; controllerunitstr:'AT90CAN64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA48'; controllerunitstr:'ATMEGA48'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'AT90CAN32'; controllerunitstr:'AT90CAN32'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY9'; controllerunitstr:'ATTINY9'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY87'; controllerunitstr:'ATTINY87'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1281'; controllerunitstr:'ATMEGA1281'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM216'; controllerunitstr:'AT90PWM216'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA3250A'; controllerunitstr:'ATMEGA3250A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA88A'; controllerunitstr:'ATMEGA88A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128RFA1'; controllerunitstr:'ATMEGA128RFA1'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA3290PA'; controllerunitstr:'ATMEGA3290PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM81'; controllerunitstr:'AT90PWM81'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:256; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325P'; controllerunitstr:'ATMEGA325P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY84'; controllerunitstr:'ATTINY84'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328P'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13A'; controllerunitstr:'ATTINY13A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA8'; controllerunitstr:'ATMEGA8'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1284P'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA16U2'; controllerunitstr:'ATMEGA16U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY45'; controllerunitstr:'ATTINY45'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250'; controllerunitstr:'ATMEGA3250'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA329'; controllerunitstr:'ATMEGA329'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32A'; controllerunitstr:'ATMEGA32A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY5'; controllerunitstr:'ATTINY5'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'AT90CAN128'; controllerunitstr:'AT90CAN128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6490'; controllerunitstr:'ATMEGA6490'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8515'; controllerunitstr:'ATMEGA8515'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA88PA'; controllerunitstr:'ATMEGA88PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168A'; controllerunitstr:'ATMEGA168A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128'; controllerunitstr:'ATMEGA128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90USB1286'; controllerunitstr:'AT90USB1286'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA164PA'; controllerunitstr:'ATMEGA164PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY828'; controllerunitstr:'ATTINY828'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA88'; controllerunitstr:'ATMEGA88'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA645A'; controllerunitstr:'ATMEGA645A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290P'; controllerunitstr:'ATMEGA3290P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA644P'; controllerunitstr:'ATMEGA644P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA164A'; controllerunitstr:'ATMEGA164A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY4313'; controllerunitstr:'ATTINY4313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA162'; controllerunitstr:'ATMEGA162'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA32C1'; controllerunitstr:'ATMEGA32C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA128A'; controllerunitstr:'ATMEGA128A'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA324A'; controllerunitstr:'ATMEGA324A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13'; controllerunitstr:'ATTINY13'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA2561'; controllerunitstr:'ATMEGA2561'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA169A'; controllerunitstr:'ATMEGA169A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261'; controllerunitstr:'ATTINY261'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644A'; controllerunitstr:'ATMEGA644A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290'; controllerunitstr:'ATMEGA3290'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64A'; controllerunitstr:'ATMEGA64A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA169P'; controllerunitstr:'ATMEGA169P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA2560'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA32'; controllerunitstr:'ATMEGA32'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861A'; controllerunitstr:'ATTINY861A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY28'; controllerunitstr:'ATTINY28'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:0; sramsize:0; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA48P'; controllerunitstr:'ATMEGA48P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA8535'; controllerunitstr:'ATMEGA8535'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168PA'; controllerunitstr:'ATMEGA168PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16M1'; controllerunitstr:'ATMEGA16M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16HVB'; controllerunitstr:'ATMEGA16HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA164P'; controllerunitstr:'ATMEGA164P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325A'; controllerunitstr:'ATMEGA325A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA640'; controllerunitstr:'ATMEGA640'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6450'; controllerunitstr:'ATMEGA6450'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA329P'; controllerunitstr:'ATMEGA329P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATA6286'; controllerunitstr:'ATA6286'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90USB647'; controllerunitstr:'AT90USB647'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA168'; controllerunitstr:'ATMEGA168'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA6490A'; controllerunitstr:'ATMEGA6490A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32M1'; controllerunitstr:'ATMEGA32M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64C1'; controllerunitstr:'ATMEGA64C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U2'; controllerunitstr:'ATMEGA32U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:1024; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY4'; controllerunitstr:'ATTINY4'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA644PA'; controllerunitstr:'ATMEGA644PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'AT90PWM1'; controllerunitstr:'AT90PWM1'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44'; controllerunitstr:'ATTINY44'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325PA'; controllerunitstr:'ATMEGA325PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA6450A'; controllerunitstr:'ATMEGA6450A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY2313A'; controllerunitstr:'ATTINY2313A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA329PA'; controllerunitstr:'ATMEGA329PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY461A'; controllerunitstr:'ATTINY461A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA6450P'; controllerunitstr:'ATMEGA6450P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA64'; controllerunitstr:'ATMEGA64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165PA'; controllerunitstr:'ATMEGA165PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16A'; controllerunitstr:'ATMEGA16A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649'; controllerunitstr:'ATMEGA649'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1280'; controllerunitstr:'ATMEGA1280'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM2B'; controllerunitstr:'AT90PWM2B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649P'; controllerunitstr:'ATMEGA649P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3250P'; controllerunitstr:'ATMEGA3250P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA48PA'; controllerunitstr:'ATMEGA48PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY1634'; controllerunitstr:'ATTINY1634'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325'; controllerunitstr:'ATMEGA325'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA169PA'; controllerunitstr:'ATMEGA169PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261A'; controllerunitstr:'ATTINY261A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY25'; controllerunitstr:'ATTINY25'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
+        ,(controllertypestr:'AT90CAN32';controllerunitstr:'AT90CAN32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'AT90CAN64';controllerunitstr:'AT90CAN64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90CAN128';controllerunitstr:'AT90CAN128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90PWM1';controllerunitstr:'AT90PWM1';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM2B';controllerunitstr:'AT90PWM2B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM3B';controllerunitstr:'AT90PWM3B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM81';controllerunitstr:'AT90PWM81';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:256;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM161';controllerunitstr:'AT90PWM161';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM216';controllerunitstr:'AT90PWM216';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM316';controllerunitstr:'AT90PWM316';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB82';controllerunitstr:'AT90USB82';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB162';controllerunitstr:'AT90USB162';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB646';controllerunitstr:'AT90USB646';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB647';controllerunitstr:'AT90USB647';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB1286';controllerunitstr:'AT90USB1286';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90USB1287';controllerunitstr:'AT90USB1287';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATA6285';controllerunitstr:'ATA6285';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATA6286';controllerunitstr:'ATA6286';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATMEGA8';controllerunitstr:'ATMEGA8';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8A';controllerunitstr:'ATMEGA8A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8HVA';controllerunitstr:'ATMEGA8HVA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA8U2';controllerunitstr:'ATMEGA8U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16';controllerunitstr:'ATMEGA16';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16A';controllerunitstr:'ATMEGA16A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVA';controllerunitstr:'ATMEGA16HVA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA16HVB';controllerunitstr:'ATMEGA16HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVBREVB';controllerunitstr:'ATMEGA16HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16M1';controllerunitstr:'ATMEGA16M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U2';controllerunitstr:'ATMEGA16U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U4';controllerunitstr:'ATMEGA16U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1280;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA32';controllerunitstr:'ATMEGA32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32A';controllerunitstr:'ATMEGA32A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32C1';controllerunitstr:'ATMEGA32C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVB';controllerunitstr:'ATMEGA32HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVBREVB';controllerunitstr:'ATMEGA32HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32M1';controllerunitstr:'ATMEGA32M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U2';controllerunitstr:'ATMEGA32U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:1024;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U4';controllerunitstr:'ATMEGA32U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2560;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA48';controllerunitstr:'ATMEGA48';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48A';controllerunitstr:'ATMEGA48A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48P';controllerunitstr:'ATMEGA48P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PA';controllerunitstr:'ATMEGA48PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PB';controllerunitstr:'ATMEGA48PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA64';controllerunitstr:'ATMEGA64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64A';controllerunitstr:'ATMEGA64A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64C1';controllerunitstr:'ATMEGA64C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64HVE2';controllerunitstr:'ATMEGA64HVE2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA64M1';controllerunitstr:'ATMEGA64M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64RFR2';controllerunitstr:'ATMEGA64RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA88';controllerunitstr:'ATMEGA88';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88A';controllerunitstr:'ATMEGA88A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88P';controllerunitstr:'ATMEGA88P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PA';controllerunitstr:'ATMEGA88PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PB';controllerunitstr:'ATMEGA88PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA128';controllerunitstr:'ATMEGA128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128A';controllerunitstr:'ATMEGA128A';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFA1';controllerunitstr:'ATMEGA128RFA1';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFR2';controllerunitstr:'ATMEGA128RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA162';controllerunitstr:'ATMEGA162';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164A';controllerunitstr:'ATMEGA164A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164P';controllerunitstr:'ATMEGA164P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164PA';controllerunitstr:'ATMEGA164PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165A';controllerunitstr:'ATMEGA165A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165P';controllerunitstr:'ATMEGA165P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165PA';controllerunitstr:'ATMEGA165PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168';controllerunitstr:'ATMEGA168';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168A';controllerunitstr:'ATMEGA168A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168P';controllerunitstr:'ATMEGA168P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PA';controllerunitstr:'ATMEGA168PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PB';controllerunitstr:'ATMEGA168PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169A';controllerunitstr:'ATMEGA169A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169P';controllerunitstr:'ATMEGA169P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169PA';controllerunitstr:'ATMEGA169PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA256RFR2';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324A';controllerunitstr:'ATMEGA324A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324P';controllerunitstr:'ATMEGA324P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PA';controllerunitstr:'ATMEGA324PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PB';controllerunitstr:'ATMEGA324PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325';controllerunitstr:'ATMEGA325';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325A';controllerunitstr:'ATMEGA325A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325P';controllerunitstr:'ATMEGA325P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325PA';controllerunitstr:'ATMEGA325PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328';controllerunitstr:'ATMEGA328';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328P';controllerunitstr:'ATMEGA328P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328PB';controllerunitstr:'ATMEGA328PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329';controllerunitstr:'ATMEGA329';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329A';controllerunitstr:'ATMEGA329A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329P';controllerunitstr:'ATMEGA329P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329PA';controllerunitstr:'ATMEGA329PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA406';controllerunitstr:'ATMEGA406';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:40960;srambase:256;sramsize:2048;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA640';controllerunitstr:'ATMEGA640';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA644';controllerunitstr:'ATMEGA644';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644A';controllerunitstr:'ATMEGA644A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644P';controllerunitstr:'ATMEGA644P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644PA';controllerunitstr:'ATMEGA644PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644RFR2';controllerunitstr:'ATMEGA644RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645';controllerunitstr:'ATMEGA645';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645A';controllerunitstr:'ATMEGA645A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645P';controllerunitstr:'ATMEGA645P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649';controllerunitstr:'ATMEGA649';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649A';controllerunitstr:'ATMEGA649A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649P';controllerunitstr:'ATMEGA649P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA808';controllerunitstr:'ATMEGA808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA809';controllerunitstr:'ATMEGA809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1280';controllerunitstr:'ATMEGA1280';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1281';controllerunitstr:'ATMEGA1281';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284';controllerunitstr:'ATMEGA1284';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284P';controllerunitstr:'ATMEGA1284P';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284RFR2';controllerunitstr:'ATMEGA1284RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1608';controllerunitstr:'ATMEGA1608';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1609';controllerunitstr:'ATMEGA1609';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA2560';controllerunitstr:'ATMEGA2560';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2561';controllerunitstr:'ATMEGA2561';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2564RFR2';controllerunitstr:'ATMEGA2564RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA3208';controllerunitstr:'ATMEGA3208';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3209';controllerunitstr:'ATMEGA3209';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3250';controllerunitstr:'ATMEGA3250';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250A';controllerunitstr:'ATMEGA3250A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250P';controllerunitstr:'ATMEGA3250P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250PA';controllerunitstr:'ATMEGA3250PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290';controllerunitstr:'ATMEGA3290';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290A';controllerunitstr:'ATMEGA3290A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290P';controllerunitstr:'ATMEGA3290P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290PA';controllerunitstr:'ATMEGA3290PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA4808';controllerunitstr:'ATMEGA4808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA4809';controllerunitstr:'ATMEGA4809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA6450';controllerunitstr:'ATMEGA6450';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450A';controllerunitstr:'ATMEGA6450A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450P';controllerunitstr:'ATMEGA6450P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490';controllerunitstr:'ATMEGA6490';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490A';controllerunitstr:'ATMEGA6490A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490P';controllerunitstr:'ATMEGA6490P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA8515';controllerunitstr:'ATMEGA8515';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8535';controllerunitstr:'ATMEGA8535';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY4';controllerunitstr:'ATTINY4';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY5';controllerunitstr:'ATTINY5';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY9';controllerunitstr:'ATTINY9';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY10';controllerunitstr:'ATTINY10';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY11';controllerunitstr:'ATTINY11';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY12';controllerunitstr:'ATTINY12';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13';controllerunitstr:'ATTINY13';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13A';controllerunitstr:'ATTINY13A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY15';controllerunitstr:'ATTINY15';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY20';controllerunitstr:'ATTINY20';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:64;sramsize:128;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY24';controllerunitstr:'ATTINY24';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY24A';controllerunitstr:'ATTINY24A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY25';controllerunitstr:'ATTINY25';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY26';controllerunitstr:'ATTINY26';cputype:cpu_avr2;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY28';controllerunitstr:'ATTINY28';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY40';controllerunitstr:'ATTINY40';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:64;sramsize:256;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY43U';controllerunitstr:'ATTINY43U';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY44';controllerunitstr:'ATTINY44';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY44A';controllerunitstr:'ATTINY44A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY45';controllerunitstr:'ATTINY45';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY48';controllerunitstr:'ATTINY48';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY84';controllerunitstr:'ATTINY84';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY84A';controllerunitstr:'ATTINY84A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY85';controllerunitstr:'ATTINY85';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY87';controllerunitstr:'ATTINY87';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY88';controllerunitstr:'ATTINY88';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY102';controllerunitstr:'ATTINY102';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY104';controllerunitstr:'ATTINY104';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY167';controllerunitstr:'ATTINY167';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY202';controllerunitstr:'ATTINY202';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY204';controllerunitstr:'ATTINY204';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY212';controllerunitstr:'ATTINY212';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY214';controllerunitstr:'ATTINY214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY261';controllerunitstr:'ATTINY261';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY261A';controllerunitstr:'ATTINY261A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY402';controllerunitstr:'ATTINY402';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY404';controllerunitstr:'ATTINY404';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY406';controllerunitstr:'ATTINY406';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY412';controllerunitstr:'ATTINY412';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY414';controllerunitstr:'ATTINY414';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416';controllerunitstr:'ATTINY416';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416AUTO';controllerunitstr:'ATTINY416AUTO';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY417';controllerunitstr:'ATTINY417';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY441';controllerunitstr:'ATTINY441';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461';controllerunitstr:'ATTINY461';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461A';controllerunitstr:'ATTINY461A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY804';controllerunitstr:'ATTINY804';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY806';controllerunitstr:'ATTINY806';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY807';controllerunitstr:'ATTINY807';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY814';controllerunitstr:'ATTINY814';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY816';controllerunitstr:'ATTINY816';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY817';controllerunitstr:'ATTINY817';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY828';controllerunitstr:'ATTINY828';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY841';controllerunitstr:'ATTINY841';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861';controllerunitstr:'ATTINY861';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861A';controllerunitstr:'ATTINY861A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY1604';controllerunitstr:'ATTINY1604';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1606';controllerunitstr:'ATTINY1606';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1607';controllerunitstr:'ATTINY1607';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1614';controllerunitstr:'ATTINY1614';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1616';controllerunitstr:'ATTINY1616';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1617';controllerunitstr:'ATTINY1617';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1624';controllerunitstr:'ATTINY1624';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1626';controllerunitstr:'ATTINY1626';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1627';controllerunitstr:'ATTINY1627';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1634';controllerunitstr:'ATTINY1634';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY2313';controllerunitstr:'ATTINY2313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY2313A';controllerunitstr:'ATTINY2313A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY3214';controllerunitstr:'ATTINY3214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3216';controllerunitstr:'ATTINY3216';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3217';controllerunitstr:'ATTINY3217';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY4313';controllerunitstr:'ATTINY4313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        // Controller board aliases
+        ,(controllertypestr:'ARDUINOLEONARDO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINOMEGA'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ARDUINOMICRO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANOEVERY'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ARDUINOUNO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA256RFR2XPRO';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324PBXPRO'; controllerunitstr:'ATMEGA324PB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA1284PXPLAINED'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ATMEGA4809XPRO'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ATTINY817XPRO'; controllerunitstr:'ATTINY817'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:15872; sramsize:512; eeprombase:5120; eepromsize:128)
+        ,(controllertypestr:'ATTINY3217XPRO'; controllerunitstr:'ATTINY3217'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:14336; sramsize:2048; eeprombase:5120; eepromsize:256)
    );
 
    { Supported optimizations, only used for information }
@@ -434,12 +588,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
@@ -453,22 +607,26 @@ Const
        CPUAVR_HAS_ELPM,
        CPUAVR_HAS_ELPMX,
        CPUAVR_2_BYTE_PC,
-       CPUAVR_3_BYTE_PC
+       CPUAVR_3_BYTE_PC,
+       CPUAVR_16_REGS,
+       CPUAVR_NOMEMMAPPED_REGS
       );
 
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
-     ( { cpu_none  } [],
-       { cpu_avr1  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr2  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr25 } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr3  } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
-       { cpu_avr31 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
-       { cpu_avr35 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr4  } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr5  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr51 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr6  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC]
+     ( { cpu_none      } [],
+       { cpu_avrtiny   } [CPUAVR_16_REGS,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS],
+       { cpu_avr1      } [CPUAVR_2_BYTE_PC],
+       { cpu_avr2      } [CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr25     } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr3      } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
+       { cpu_avr31     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
+       { cpu_avr35     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr4      } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr5      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr51     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr6      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC],
+       { cpu_avrxmega3 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS]
      );
 
 Implementation

+ 1 - 0
compiler/avr/cpunode.pas

@@ -37,6 +37,7 @@ unit cpunode;
        ,navradd
        ,navrmat
        ,navrcnv
+       ,navrinl
        ,navrmem
        ,navrutil,
        { symtable }

+ 38 - 21
compiler/avr/cpupara.pas

@@ -39,7 +39,7 @@ unit cpupara;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
@@ -57,7 +57,10 @@ unit cpupara;
 
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
-        result:=VOLATILE_INTREGISTERS;
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          result:=VOLATILE_INTREGISTERS-[RS_R18,RS_R19]
+        else
+          result:=VOLATILE_INTREGISTERS;
       end;
 
 
@@ -167,7 +170,7 @@ unit cpupara;
             result:=not(def.size in [1,2,4]);
           }
           else
-            if (def.size > 8) then
+            if (def.size > 8) or ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and (def.size > 4)) then
               result:=true
             else
               result:=inherited ret_in_param(def,pd);
@@ -204,7 +207,8 @@ unit cpupara;
         begin
           { In case of po_delphi_nested_cc, the parent frame pointer
             is always passed on the stack. }
-           if (nextintreg>RS_R9) and
+           if (((nextintreg>RS_R9) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+               (nextintreg>RS_R21)) and
               (not(vo_is_parentfp in hp.varoptions) or
                not(po_delphi_nested_cc in p.procoptions)) then
              begin
@@ -220,7 +224,10 @@ unit cpupara;
                paraloc^.loc:=LOC_REFERENCE;
                paraloc^.reference.index:=NR_STACK_POINTER_REG;
                paraloc^.reference.offset:=stack_offset;
+{$push}
+{$R-}
                dec(stack_offset,2);
+{$pop}
             end;
         end;
 
@@ -300,7 +307,8 @@ unit cpupara;
                    by adding paralen mod 2, make the size even
                  }
                  nextintreg:=curintreg-(paralen+(paralen mod 2))+1;
-                 if nextintreg>=RS_R8 then
+                 if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                   (nextintreg>=RS_R20) then
                    curintreg:=nextintreg-1
                  else
                    begin
@@ -335,7 +343,8 @@ unit cpupara;
                  case loc of
                     LOC_REGISTER:
                       begin
-                        if nextintreg>=RS_R8 then
+                        if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                          (nextintreg>=RS_R20) then
                           begin
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.size:=OS_8;
@@ -352,19 +361,19 @@ unit cpupara;
                       begin
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
-                            paraloc^.size:=OS_ADDR;
-                            paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
-                            assignintreg
-                          end
+                           paraloc^.size:=OS_ADDR;
+                           paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
+                           assignintreg;
+                         end
                         else
                           begin
-                             paraloc^.def:=hp.vardef;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,hp.vardef.size);
-                          end;
-                        dec(paralen,hp.vardef.size);
+                            paraloc^.def:=hp.vardef;
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            inc(stack_offset,paralen);
+                         end;
+                        paralen:=0;
                       end;
                     else
                       internalerror(2002071002);
@@ -526,17 +535,25 @@ unit cpupara;
       end;
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
       var
         cur_stack_offset: aword;
         curintreg, curfloatreg, curmmreg: tsuperregister;
       begin
         init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset);
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
         if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset)
+          begin
+            { just continue loading the parameters in the registers }
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  result:=create_paraloc_info_intern(p,side,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset)
+                else
+                  internalerror(2019021914);
+              end;
+          end
         else
           internalerror(200410231);
       end;

+ 3 - 2
compiler/avr/cpupi.pas

@@ -44,7 +44,7 @@ unit cpupi;
   implementation
 
     uses
-       globals,systems,
+       globals,systems,verbose,
        cpubase,
        aasmtai,aasmdata,
        tgobj,
@@ -80,7 +80,8 @@ unit cpupi;
       begin
         { because of the limited branch distance of cond. branches, they must be replaced
           sometimes by normal jmps and an inverse branch }
-        finalizeavrcode(aktproccode);
+        if not(finalizeavrcode(aktproccode)) then
+          message1(cg_w_cannot_compile_subroutine,procdef.fullprocname(false));
       end;
 
 begin

+ 2 - 3
compiler/avr/hlcgcpu.pas

@@ -38,8 +38,6 @@ interface
       procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
     end;
 
-  procedure create_hlcodegen;
-
 implementation
 
   uses
@@ -52,7 +50,7 @@ implementation
     end;
 
 
-  procedure create_hlcodegen;
+  procedure create_hlcodegen_cpu;
     begin
       hlcg:=thlcgcpu.create;
       create_codegen;
@@ -60,4 +58,5 @@ implementation
 
 begin
   chlcgobj:=thlcgcpu;
+  create_hlcodegen:=@create_hlcodegen_cpu;
 end.

+ 37 - 9
compiler/avr/navradd.pas

@@ -137,8 +137,8 @@ interface
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,tmpreg2));
           for i:=2 to tcgsize2size[left.location.size] do
             begin
-              tmpreg1:=GetNextReg(tmpreg1);
-              tmpreg2:=GetNextReg(tmpreg2);
+              tmpreg1:=cg.GetNextReg(tmpreg1);
+              tmpreg2:=cg.GetNextReg(tmpreg2);
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
             end;
         end;
@@ -202,17 +202,45 @@ interface
               hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
           end;
 
+        if (not unsigned) and
+          (right.location.loc=LOC_CONSTANT) and
+          (right.location.value=0) and
+          (getresflags(unsigned) in [F_LT,F_GE]) then
+          begin
+            { This is a simple sign test, where we can just test the msb }
+            tmpreg1:=left.location.register;
+            for i:=2 to tcgsize2size[left.location.size] do
+              begin
+                if i=5 then
+                  tmpreg1:=left.location.registerhi
+                else
+                  tmpreg1:=cg.GetNextReg(tmpreg1);
+              end;
+
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,GetDefaultZeroReg));
+
+            location_reset(location,LOC_FLAGS,OS_NO);
+            location.resflags:=getresflags(unsigned);
+
+            exit;
+          end;
+
         if right.location.loc=LOC_CONSTANT then
           begin
             { decrease register pressure on registers >= r16 }
             if (right.location.value and $ff)=0 then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R1))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,GetDefaultZeroReg))
             else
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CPI,left.location.register,right.location.value and $ff))
+              begin
+                cg.getcpuregister(current_asmdata.CurrAsmList,NR_R26);
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_LDI,NR_R26,right.location.value and $ff));
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R26));
+                cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R26);
+              end;
           end
         { on the left side, we allow only a constant if it is 0 }
         else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,NR_R1,right.location.register))
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,right.location.register))
         else
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,right.location.register));
 
@@ -231,15 +259,15 @@ interface
             else
               begin
                 if left.location.loc<>LOC_CONSTANT then
-                  tmpreg1:=GetNextReg(tmpreg1);
+                  tmpreg1:=cg.GetNextReg(tmpreg1);
                 if right.location.loc<>LOC_CONSTANT then
-                  tmpreg2:=GetNextReg(tmpreg2);
+                  tmpreg2:=cg.GetNextReg(tmpreg2);
               end;
             if right.location.loc=LOC_CONSTANT then
               begin
                 { just use R1? }
                 if ((right.location.value64 shr ((i-1)*8)) and $ff)=0 then
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,NR_R1))
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,GetDefaultZeroReg))
                 else
                   begin
                     tmpreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
@@ -249,7 +277,7 @@ interface
               end
             { above it is checked, if left=0, then a constant is allowed }
             else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,NR_R1,tmpreg2))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,GetDefaultZeroReg,tmpreg2))
             else
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
           end;

Some files were not shown because too many files changed in this diff