Переглянути джерело

* synchronized with trunk

git-svn-id: branches/wasm@46214 -
nickysn 5 роки тому
батько
коміт
62cb7c2d12
100 змінених файлів з 10187 додано та 2700 видалено
  1. 317 9
      .gitattributes
  2. 88 0
      .gitignore
  3. 236 40
      Makefile
  4. 25 8
      Makefile.fpc
  5. 387 69
      compiler/Makefile
  6. 45 10
      compiler/Makefile.fpc
  7. 7 3
      compiler/aarch64/a64att.inc
  8. 4 0
      compiler/aarch64/a64atts.inc
  9. 10 2
      compiler/aarch64/a64ins.dat
  10. 7 3
      compiler/aarch64/a64op.inc
  11. 67 2
      compiler/aarch64/a64reg.dat
  12. 123 11
      compiler/aarch64/aasmcpu.pas
  13. 512 10
      compiler/aarch64/agcpugas.pas
  14. 472 40
      compiler/aarch64/aoptcpu.pas
  15. 344 112
      compiler/aarch64/cgcpu.pas
  16. 58 5
      compiler/aarch64/cpubase.pas
  17. 2 2
      compiler/aarch64/cpuinfo.pas
  18. 1 1
      compiler/aarch64/cpunode.pas
  19. 43 34
      compiler/aarch64/cpupara.pas
  20. 81 2
      compiler/aarch64/cpupi.pas
  21. 5 2
      compiler/aarch64/cputarg.pas
  22. 9 0
      compiler/aarch64/ncpuadd.pas
  23. 90 0
      compiler/aarch64/ncpucon.pas
  24. 564 0
      compiler/aarch64/ncpuflw.pas
  25. 94 0
      compiler/aarch64/ncpuinl.pas
  26. 75 9
      compiler/aarch64/ncpumat.pas
  27. 144 16
      compiler/aarch64/ncpuset.pas
  28. 64 0
      compiler/aarch64/ra64con.inc
  29. 64 0
      compiler/aarch64/ra64dwa.inc
  30. 1 1
      compiler/aarch64/ra64nor.inc
  31. 64 0
      compiler/aarch64/ra64num.inc
  32. 201 137
      compiler/aarch64/ra64rni.inc
  33. 200 136
      compiler/aarch64/ra64sri.inc
  34. 64 0
      compiler/aarch64/ra64sta.inc
  35. 64 0
      compiler/aarch64/ra64std.inc
  36. 64 0
      compiler/aarch64/ra64sup.inc
  37. 2 0
      compiler/aarch64/racpu.pas
  38. 235 6
      compiler/aarch64/racpugas.pas
  39. 10 0
      compiler/aarch64/rgcpu.pas
  40. 48 0
      compiler/aarch64/tripletcpu.pas
  41. 22 4
      compiler/aasmbase.pas
  42. 11 10
      compiler/aasmcnst.pas
  43. 1 1
      compiler/aasmdef.pas
  44. 114 26
      compiler/aasmtai.pas
  45. 179 69
      compiler/aggas.pas
  46. 9 3
      compiler/aopt.pas
  47. 62 27
      compiler/aoptbase.pas
  48. 1018 121
      compiler/aoptobj.pas
  49. 2 2
      compiler/aoptutils.pas
  50. 24 7
      compiler/arm/aasmcpu.pas
  51. 32 7
      compiler/arm/agarmgas.pas
  52. 103 589
      compiler/arm/aoptcpu.pas
  53. 75 43
      compiler/arm/cgcpu.pas
  54. 28 5
      compiler/arm/cpubase.pas
  55. 21 5
      compiler/arm/cpuelf.pas
  56. 44 26
      compiler/arm/cpuinfo.pas
  57. 2 1
      compiler/arm/cpunode.pas
  58. 7 7
      compiler/arm/cpupara.pas
  59. 1 1
      compiler/arm/cpupi.pas
  60. 7 0
      compiler/arm/cputarg.pas
  61. 7 65
      compiler/arm/narmadd.pas
  62. 1 1
      compiler/arm/narmcal.pas
  63. 12 25
      compiler/arm/narmcnv.pas
  64. 6 2
      compiler/arm/narmcon.pas
  65. 12 12
      compiler/arm/narminl.pas
  66. 93 16
      compiler/arm/narmld.pas
  67. 9 14
      compiler/arm/narmmat.pas
  68. 332 0
      compiler/arm/narmutil.pas
  69. 51 0
      compiler/arm/tripletcpu.pas
  70. 1027 0
      compiler/armgen/aoptarm.pas
  71. 2 2
      compiler/armgen/armpara.pas
  72. 160 19
      compiler/assemble.pas
  73. 112 28
      compiler/avr/aasmcpu.pas
  74. 3 2
      compiler/avr/agavrgas.pas
  75. 63 24
      compiler/avr/aoptcpu.pas
  76. 8 7
      compiler/avr/ccpuinnr.inc
  77. 354 215
      compiler/avr/cgcpu.pas
  78. 46 7
      compiler/avr/cpubase.pas
  79. 442 284
      compiler/avr/cpuinfo.pas
  80. 22 16
      compiler/avr/cpupara.pas
  81. 3 2
      compiler/avr/cpupi.pas
  82. 4 22
      compiler/avr/itcpugas.pas
  83. 11 6
      compiler/avr/navradd.pas
  84. 47 6
      compiler/avr/navrmat.pas
  85. 6 4
      compiler/avr/rgcpu.pas
  86. 45 0
      compiler/avr/tripletcpu.pas
  87. 3 3
      compiler/blockutl.pas
  88. 3 8
      compiler/browcol.pas
  89. 1 2
      compiler/ccharset.pas
  90. 46 4
      compiler/cclasses.pas
  91. 30 0
      compiler/cepiktimer.pas
  92. 52 13
      compiler/cfidwarf.pas
  93. 10 7
      compiler/cfileutl.pas
  94. 60 5
      compiler/cg64f32.pas
  95. 104 46
      compiler/cgbase.pas
  96. 6 6
      compiler/cgexcept.pas
  97. 315 196
      compiler/cgobj.pas
  98. 1 2
      compiler/comphook.pas
  99. 13 3
      compiler/compiler.pas
  100. 12 2
      compiler/compinnr.pas

Різницю між файлами не показано, бо вона завелика
+ 317 - 9
.gitattributes


+ 88 - 0
.gitignore

@@ -202,8 +202,26 @@ compiler/utils/fpcmade.*
 compiler/utils/fpcmkcfg
 compiler/utils/fpcsubst
 compiler/utils/fppkg
+compiler/utils/gia64reg
+compiler/utils/gppc386
+compiler/utils/mk68kins
+compiler/utils/mk68kreg
+compiler/utils/mka64ins
+compiler/utils/mka64reg
+compiler/utils/mkarmins
+compiler/utils/mkarmreg
+compiler/utils/mkavrreg
+compiler/utils/mkia64reg
+compiler/utils/mkjvmreg
+compiler/utils/mkmpsreg
+compiler/utils/mkppcreg
+compiler/utils/mkspreg
 compiler/utils/mkx86ins
 compiler/utils/mkx86reg
+compiler/utils/mkxtensareg
+compiler/utils/mkz80ins
+compiler/utils/mkz80reg
+compiler/utils/msg2inc
 compiler/utils/ppudump
 compiler/utils/ppufiles
 compiler/utils/ppumove
@@ -555,6 +573,7 @@ packages/chm/*.o
 packages/chm/*.ppu
 packages/chm/*.s
 packages/chm/Package.fpc
+packages/chm/bin
 packages/chm/build-stamp.*
 packages/chm/examples/*.bak
 packages/chm/examples/*.exe
@@ -1778,6 +1797,7 @@ packages/gdbint/*.o
 packages/gdbint/*.ppu
 packages/gdbint/*.s
 packages/gdbint/Package.fpc
+packages/gdbint/bin
 packages/gdbint/build-stamp.*
 packages/gdbint/examples/*.bak
 packages/gdbint/examples/*.exe
@@ -2673,6 +2693,7 @@ packages/ide/*.exe
 packages/ide/*.o
 packages/ide/*.ppu
 packages/ide/*.s
+packages/ide/bin
 packages/ide/compiler/*.bak
 packages/ide/compiler/*.exe
 packages/ide/compiler/*.o
@@ -6154,6 +6175,7 @@ packages/xforms/*.o
 packages/xforms/*.ppu
 packages/xforms/*.s
 packages/xforms/Package.fpc
+packages/xforms/bin
 packages/xforms/build-stamp.*
 packages/xforms/examples/*.bak
 packages/xforms/examples/*.exe
@@ -7652,6 +7674,7 @@ utils/*.o
 utils/*.ppu
 utils/*.s
 utils/Package.fpc
+utils/bin
 utils/bin2obj
 utils/bin2obj.exe
 utils/data2inc
@@ -7704,7 +7727,9 @@ utils/fpcm/*.o
 utils/fpcm/*.ppu
 utils/fpcm/*.s
 utils/fpcm/Package.fpc
+utils/fpcm/bin
 utils/fpcm/fpcmade.*
+utils/fpcm/fpcmake
 utils/fpcm/units
 utils/fpcmade.*
 utils/fpcres/*.bak
@@ -7713,6 +7738,7 @@ utils/fpcres/*.o
 utils/fpcres/*.ppu
 utils/fpcres/*.s
 utils/fpcres/Package.fpc
+utils/fpcres/bin
 utils/fpcres/build-stamp.*
 utils/fpcres/fpcmade.*
 utils/fpcres/units
@@ -7722,6 +7748,7 @@ utils/fpcreslipo/*.o
 utils/fpcreslipo/*.ppu
 utils/fpcreslipo/*.s
 utils/fpcreslipo/Package.fpc
+utils/fpcreslipo/bin
 utils/fpcreslipo/build-stamp.*
 utils/fpcreslipo/fpcmade.*
 utils/fpcreslipo/units
@@ -7731,6 +7758,7 @@ utils/fpdoc/*.o
 utils/fpdoc/*.ppu
 utils/fpdoc/*.s
 utils/fpdoc/Package.fpc
+utils/fpdoc/bin
 utils/fpdoc/fpcmade.*
 utils/fpdoc/fpde/*.bak
 utils/fpdoc/fpde/*.exe
@@ -7757,6 +7785,8 @@ utils/fpdoc/intl/Package.fpc
 utils/fpdoc/intl/fpcmade.*
 utils/fpdoc/intl/units
 utils/fpdoc/units
+utils/fpmake
+utils/fpmake.exe
 utils/fpmc/*.bak
 utils/fpmc/*.exe
 utils/fpmc/*.o
@@ -7771,6 +7801,7 @@ utils/fppkg/*.o
 utils/fppkg/*.ppu
 utils/fppkg/*.s
 utils/fppkg/Package.fpc
+utils/fppkg/bin
 utils/fppkg/build-stamp.*
 utils/fppkg/examples/*.bak
 utils/fppkg/examples/*.exe
@@ -7807,6 +7838,7 @@ utils/fprcp/*.o
 utils/fprcp/*.ppu
 utils/fprcp/*.s
 utils/fprcp/Package.fpc
+utils/fprcp/bin
 utils/fprcp/fpcmade.*
 utils/fprcp/units
 utils/h2pas/*.bak
@@ -7815,8 +7847,36 @@ utils/h2pas/*.o
 utils/h2pas/*.ppu
 utils/h2pas/*.s
 utils/h2pas/Package.fpc
+utils/h2pas/bin
 utils/h2pas/fpcmade.*
 utils/h2pas/units
+utils/ihx2tzx/*.bak
+utils/ihx2tzx/*.exe
+utils/ihx2tzx/*.o
+utils/ihx2tzx/*.ppu
+utils/ihx2tzx/*.s
+utils/ihx2tzx/Package.fpc
+utils/ihx2tzx/bin
+utils/ihx2tzx/fpcmade.*
+utils/ihx2tzx/units
+utils/ihxutil/*.bak
+utils/ihxutil/*.exe
+utils/ihxutil/*.o
+utils/ihxutil/*.ppu
+utils/ihxutil/*.s
+utils/ihxutil/Package.fpc
+utils/ihxutil/bin
+utils/ihxutil/fpcmade.*
+utils/ihxutil/units
+utils/instantfpc/*.bak
+utils/instantfpc/*.exe
+utils/instantfpc/*.o
+utils/instantfpc/*.ppu
+utils/instantfpc/*.s
+utils/instantfpc/Package.fpc
+utils/instantfpc/bin
+utils/instantfpc/fpcmade.*
+utils/instantfpc/units
 utils/mksymbian/*.bak
 utils/mksymbian/*.exe
 utils/mksymbian/*.o
@@ -7826,6 +7886,24 @@ utils/mksymbian/Package.fpc
 utils/mksymbian/build-stamp.*
 utils/mksymbian/fpcmade.*
 utils/mksymbian/units
+utils/pas2jni/*.bak
+utils/pas2jni/*.exe
+utils/pas2jni/*.o
+utils/pas2jni/*.ppu
+utils/pas2jni/*.s
+utils/pas2jni/Package.fpc
+utils/pas2jni/bin
+utils/pas2jni/fpcmade.*
+utils/pas2jni/units
+utils/pas2js/*.bak
+utils/pas2js/*.exe
+utils/pas2js/*.o
+utils/pas2js/*.ppu
+utils/pas2js/*.s
+utils/pas2js/Package.fpc
+utils/pas2js/bin
+utils/pas2js/fpcmade.*
+utils/pas2js/units
 utils/postw32
 utils/postw32.exe
 utils/ppdep
@@ -7867,6 +7945,16 @@ utils/tply/*.o
 utils/tply/*.ppu
 utils/tply/*.s
 utils/tply/Package.fpc
+utils/tply/bin
 utils/tply/fpcmade.*
 utils/tply/units
+utils/unicode/*.bak
+utils/unicode/*.exe
+utils/unicode/*.o
+utils/unicode/*.ppu
+utils/unicode/*.s
+utils/unicode/Package.fpc
+utils/unicode/bin
+utils/unicode/fpcmade.*
+utils/unicode/units
 utils/units

+ 236 - 40
Makefile

@@ -2,7 +2,7 @@
 # Don't edit, this file is generated by FPCMake Version 2.0.0
 #
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-netbsd m68k-amiga m68k-atari m68k-palmos m68k-macos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-haiku x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-android x86_64-aros x86_64-dragonfly arm-linux arm-netbsd arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android arm-aros powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-embedded i8086-msdos i8086-win16 aarch64-linux aarch64-darwin aarch64-android wasm-wasm sparc64-linux riscv32-linux riscv32-embedded riscv64-linux riscv64-embedded
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-netbsd m68k-amiga m68k-atari m68k-palmos m68k-macosclassic m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macosclassic powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-haiku x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-android x86_64-aros x86_64-dragonfly arm-linux arm-netbsd arm-palmos arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android arm-aros arm-freertos arm-ios powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android mips64el-linux jvm-java jvm-android i8086-embedded i8086-msdos i8086-win16 aarch64-linux aarch64-darwin aarch64-win64 aarch64-android aarch64-ios wasm-wasm sparc64-linux riscv32-linux riscv32-embedded riscv64-linux riscv64-embedded xtensa-linux xtensa-embedded xtensa-freertos z80-embedded z80-zxspectrum z80-msxdos
 BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos win16 atari
@@ -196,6 +196,24 @@ $(error When compiling for mipsel-embedded, a sub-architecture (e.g. SUBARCH=pic
 endif
 override FPCOPT+=-Cp$(SUBARCH)
 endif
+ifeq ($(FULL_TARGET),xtensa-embedded)
+ifeq ($(SUBARCH),)
+$(error When compiling for xtensa-embedded, a sub-architecture (e.g. SUBARCH=lx106 or SUBARCH=lx6) must be defined)
+endif
+override FPCOPT+=-Cp$(SUBARCH)
+endif
+ifeq ($(FULL_TARGET),xtensa-freertos)
+ifeq ($(SUBARCH),)
+$(error When compiling for xtensa-freertos, a sub-architecture (e.g. SUBARCH=lx106 or SUBARCH=lx6) must be defined)
+endif
+override FPCOPT+=-Cp$(SUBARCH)
+endif
+ifeq ($(FULL_TARGET),arm-freertos)
+ifeq ($(SUBARCH),)
+$(error When compiling for arm-freertos, a sub-architecture (e.g. SUBARCH=armv6m or SUBARCH=armv7em) must be defined)
+endif
+override FPCOPT+=-Cp$(SUBARCH)
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 TARGETSUFFIX=$(OS_TARGET)
 SOURCESUFFIX=$(OS_SOURCE)
@@ -269,8 +287,8 @@ endif
 ifndef CROSSBINDIR
 CROSSBINDIR:=$(wildcard $(FPCDIR)/bin/$(TARGETSUFFIX))
 endif
-ifneq ($(findstring $(OS_TARGET),darwin iphonesim),)
-ifeq ($(OS_SOURCE),darwin)
+ifneq ($(findstring $(OS_TARGET),darwin iphonesim ios),)
+ifneq ($(findstring $(OS_SOURCE),darwin ios),)
 DARWIN2DARWIN=1
 endif
 endif
@@ -331,8 +349,8 @@ endif
 endif
 override PACKAGE_NAME=fpc
 override PACKAGE_VERSION=3.3.1
-REQUIREDVERSION=3.0.4
-REQUIREDVERSION2=3.0.2
+REQUIREDVERSION=3.2.0
+REQUIREDVERSION2=3.0.4
 ifndef inOS2
 override FPCDIR:=$(BASEDIR)
 export FPCDIR
@@ -395,8 +413,11 @@ endif
 ifeq ($(CPU_TARGET),riscv64)
 PPSUF=rv64
 endif
-ifeq ($(CPU_TARGET),wasm)
-PPSUF=wasm
+ifeq ($(CPU_TARGET),xtensa)
+PPSUF=xtensa
+endif
+ifeq ($(CPU_TARGET),z80)
+PPSUF=z80
 endif
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
@@ -473,12 +494,17 @@ endif
 ifneq ($(OPT),)
 OPTNEW+=$(OPT)
 endif
+ifneq ($(findstring $(OS_TARGET),linux),)
+ifneq ($(findstring $(CPU_TARGET),i386 arm),)
+override OPTNEW+=-CVglobal-dynamic
+endif
+endif
 CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)' 'FPCMAKEOPT=$(OPT)'
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macosclassic $(BuildOnlyBaseCPUs) freertos
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -569,7 +595,7 @@ endif
 ifeq ($(FULL_TARGET),m68k-palmos)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
-ifeq ($(FULL_TARGET),m68k-macos)
+ifeq ($(FULL_TARGET),m68k-macosclassic)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
 ifeq ($(FULL_TARGET),m68k-embedded)
@@ -584,7 +610,7 @@ endif
 ifeq ($(FULL_TARGET),powerpc-amiga)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
-ifeq ($(FULL_TARGET),powerpc-macos)
+ifeq ($(FULL_TARGET),powerpc-macosclassic)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
 ifeq ($(FULL_TARGET),powerpc-darwin)
@@ -662,9 +688,6 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
-ifeq ($(FULL_TARGET),arm-darwin)
-override TARGET_DIRS+=compiler rtl utils packages installer
-endif
 ifeq ($(FULL_TARGET),arm-wince)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -686,6 +709,12 @@ endif
 ifeq ($(FULL_TARGET),arm-aros)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),arm-freertos)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),arm-ios)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -719,6 +748,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-android)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),mips64el-linux)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -740,9 +772,15 @@ endif
 ifeq ($(FULL_TARGET),aarch64-darwin)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),aarch64-win64)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),aarch64-android)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),aarch64-ios)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),wasm-wasm)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -761,6 +799,24 @@ endif
 ifeq ($(FULL_TARGET),riscv64-embedded)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),xtensa-linux)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),xtensa-embedded)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),xtensa-freertos)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),z80-embedded)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),z80-zxspectrum)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),z80-msxdos)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 override INSTALL_FPCPACKAGE=y
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
@@ -958,6 +1014,56 @@ endif
 else
 CROSSBINDIR=
 endif
+ifeq ($(OS_SOURCE),linux)
+ifndef GCCLIBDIR
+ifeq ($(CPU_TARGET),i386)
+ifneq ($(findstring x86_64,$(shell uname -a)),)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m32 -print-libgcc-file-name`)
+else
+CROSSGCCOPT=-m32
+endif
+endif
+endif
+ifeq ($(CPU_TARGET),powerpc64)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m64 -print-libgcc-file-name`)
+else
+CROSSGCCOPT=-m64
+endif
+endif
+ifeq ($(CPU_TARGET),sparc)
+ifneq ($(findstring sparc64,$(shell uname -a)),)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m32 -print-libgcc-file-name`)
+else
+CROSSGCCOPT=-m32
+endif
+endif
+endif
+endif
+ifdef FPCFPMAKE
+FPCFPMAKE_CPU_TARGET=$(shell $(FPCFPMAKE) -iTP)
+ifeq ($(CPU_TARGET),$(FPCFPMAKE_CPU_TARGET))
+FPCMAKEGCCLIBDIR:=$(GCCLIBDIR)
+endif
+endif
+ifndef FPCMAKEGCCLIBDIR
+FPCMAKEGCCLIBDIR:=$(shell dirname `gcc -print-libgcc-file-name`)
+endif
+ifndef GCCLIBDIR
+CROSSGCC=$(strip $(wildcard $(addsuffix /$(BINUTILSPREFIX)gcc$(SRCEXEEXT),$(SEARCHPATH))))
+ifneq ($(CROSSGCC),)
+GCCLIBDIR:=$(shell dirname `$(CROSSGCC) $(CROSSGCCOPT) -print-libgcc-file-name`)
+endif
+endif
+endif
+ifdef inUnix
+ifeq ($(OS_SOURCE),netbsd)
+OTHERLIBDIR:=/usr/pkg/lib
+endif
+export GCCLIBDIR FPCMAKEGCCLIBDIR OTHERLIBDIR
+endif
 BATCHEXT=.bat
 LOADEREXT=.as
 EXEEXT=.exe
@@ -1102,14 +1208,14 @@ STATICLIBPREFIX=
 SHORTSUFFIX=nwl
 IMPORTLIBPREFIX=imp
 endif
-ifeq ($(OS_TARGET),macos)
+ifeq ($(OS_TARGET),macosclassic)
 BATCHEXT=
 EXEEXT=
 DEBUGSYMEXT=.xcoff
 SHORTSUFFIX=mac
 IMPORTLIBPREFIX=imp
 endif
-ifneq ($(findstring $(OS_TARGET),darwin iphonesim),)
+ifneq ($(findstring $(OS_TARGET),darwin iphonesim ios),)
 BATCHEXT=.sh
 EXEEXT=
 HASSHAREDLIB=1
@@ -1159,6 +1265,11 @@ STATICLIBPREFIX=
 STATICLIBEXT=.a
 SHORTSUFFIX=d16
 endif
+ifeq ($(OS_TARGET),msxdos)
+STATICLIBPREFIX=
+STATICLIBEXT=.a
+SHORTSUFFIX=msd
+endif
 ifeq ($(OS_TARGET),embedded)
 ifeq ($(CPU_TARGET),i8086)
 STATICLIBPREFIX=
@@ -1166,6 +1277,9 @@ STATICLIBEXT=.a
 else
 EXEEXT=.bin
 endif
+ifeq ($(CPU_TARGET),z80)
+OEXT=.rel
+endif
 SHORTSUFFIX=emb
 endif
 ifeq ($(OS_TARGET),win16)
@@ -1174,6 +1288,9 @@ STATICLIBEXT=.a
 SHAREDLIBEXT=.dll
 SHORTSUFFIX=w16
 endif
+ifeq ($(OS_TARGET),zxspectrum)
+OEXT=.rel
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 FPCMADE=fpcmade.$(SHORTSUFFIX)
 ZIPSUFFIX=$(SHORTSUFFIX)
@@ -1479,16 +1596,7 @@ override FPCOPT+=-gl
 override FPCOPTDEF+=DEBUG
 endif
 ifdef RELEASE
-ifneq ($(findstring 2.0.,$(FPC_VERSION)),)
-ifeq ($(CPU_TARGET),i386)
-FPCCPUOPT:=-OG2p3
-endif
-ifeq ($(CPU_TARGET),powerpc)
-FPCCPUOPT:=-O1r
-endif
-else
 FPCCPUOPT:=-O2
-endif
 override FPCOPT+=-Ur -Xs $(FPCCPUOPT) -n
 override FPCOPTDEF+=RELEASE
 endif
@@ -1550,6 +1658,17 @@ endif
 endif
 ifdef LINKSHARED
 endif
+ifdef GCCLIBDIR
+override FPCOPT+=-Fl$(GCCLIBDIR)
+ifdef FPCMAKEGCCLIBDIR
+override FPCMAKEOPT+=-Fl$(FPCMAKEGCCLIBDIR)
+else
+override FPCMAKEOPT+=-Fl$(GCCLIBDIR)
+endif
+endif
+ifdef OTHERLIBDIR
+override FPCOPT+=$(addprefix -Fl,$(OTHERLIBDIR))
+endif
 ifdef OPT
 override FPCOPT+=$(OPT)
 endif
@@ -1611,8 +1730,10 @@ else
 override INSTALLPPULINKFILES:=$(subst $(PPUEXT),$(OEXT),$(INSTALLPPUFILES)) $(subst $(PPUEXT),$(LTOEXT),$(INSTALLPPUFILES)) $(addprefix $(STATICLIBPREFIX),$(subst $(PPUEXT),$(STATICLIBEXT),$(INSTALLPPUFILES)))
 endif
 ifneq ($(UNITTARGETDIRPREFIX),)
-override INSTALLPPUFILES:=$(addprefix $(UNITTARGETDIRPREFIX),$(notdir $(INSTALLPPUFILES)))
-override INSTALLPPULINKFILES:=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(notdir $(INSTALLPPULINKFILES))))
+override INSTALLPPUFILENAMES:=$(notdir $(INSTALLPPUFILES))
+override INSTALLPPULINKFILENAMES:=$(notdir $(INSTALLPPULINKFILES))
+override INSTALLPPUFILES=$(addprefix $(UNITTARGETDIRPREFIX),$(INSTALLPPUFILENAMES))
+override INSTALLPPULINKFILES=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(INSTALLPPULINKFILENAMES)))
 endif
 override INSTALL_CREATEPACKAGEFPC=1
 endif
@@ -1771,8 +1892,10 @@ override CLEANPPULINKFILES:=$(subst $(PPUEXT),$(OEXT),$(CLEANPPUFILES)) $(subst
 ifdef DEBUGSYMEXT
 override CLEANPPULINKFILES+=$(subst $(PPUEXT),$(DEBUGSYMEXT),$(CLEANPPUFILES))
 endif
-override CLEANPPUFILES:=$(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPUFILES))
-override CLEANPPULINKFILES:=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPULINKFILES)))
+override CLEANPPUFILENAMES:=$(CLEANPPUFILES)
+override CLEANPPUFILES=$(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPUFILENAMES))
+override CLEANPPULINKFILENAMES:=$(CLEANPPULINKFILES)
+override CLEANPPULINKFILES=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPULINKFILENAMES)))
 endif
 fpc_clean: $(CLEANTARGET)
 ifdef CLEANEXEFILES
@@ -2141,7 +2264,7 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
-ifeq ($(FULL_TARGET),m68k-macos)
+ifeq ($(FULL_TARGET),m68k-macosclassic)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
 TARGET_DIRS_UTILS=1
@@ -2176,7 +2299,7 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
-ifeq ($(FULL_TARGET),powerpc-macos)
+ifeq ($(FULL_TARGET),powerpc-macosclassic)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
 TARGET_DIRS_UTILS=1
@@ -2358,13 +2481,6 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
-ifeq ($(FULL_TARGET),arm-darwin)
-TARGET_DIRS_COMPILER=1
-TARGET_DIRS_RTL=1
-TARGET_DIRS_UTILS=1
-TARGET_DIRS_PACKAGES=1
-TARGET_DIRS_INSTALLER=1
-endif
 ifeq ($(FULL_TARGET),arm-wince)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2414,6 +2530,20 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),arm-freertos)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),arm-ios)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2491,6 +2621,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),mips64el-linux)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2540,6 +2677,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),aarch64-win64)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),aarch64-android)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2547,6 +2691,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),aarch64-ios)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),wasm-wasm)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2589,6 +2740,48 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),xtensa-linux)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),xtensa-embedded)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),xtensa-freertos)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),z80-embedded)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),z80-zxspectrum)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),z80-msxdos)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifdef TARGET_DIRS_COMPILER
 compiler_all:
 	$(MAKE) -C compiler all
@@ -2833,7 +3026,6 @@ makefiles: fpc_makefiles
 ifneq ($(wildcard fpcmake.loc),)
 include fpcmake.loc
 endif
-.NOTPARALLEL:
 unexport FPC_VERSION FPC_COMPILERINFO OS_SOURCE
 override TARGET_DIRS:=$(wildcard $(TARGET_DIRS))
 .PHONY: help
@@ -2929,7 +3121,11 @@ ifeq ($(findstring $(CPU_TARGET), $(BuildOnlyBaseCPUs)),)
 endif
 singlezipinstall: zipinstallall
 zipinstallall: $(BUILDSTAMP)
+ifeq ($(FULL_SOURCE),$(FULL_TARGET))
 	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+else
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX).built.on.$(SOURCESUFFIX) $(INSTALLOPTS)
+endif
 .PHONY: crossall crossinstall crosszipinstall crosssinglezipinstall
 crossall:
 	$(MAKE) all CROSSINSTALL=1
@@ -2938,4 +3134,4 @@ crossinstall:
 crosszipinstall:
 	$(MAKE) zipinstall CROSSINSTALL=1
 crosssinglezipinstall:
-	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall ZIPNAME=fpc $(INSTALLOPTS)
+	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(SOURCESUFFIX).cross.$(TARGETSUFFIX) $(INSTALLOPTS)

+ 25 - 8
Makefile.fpc

@@ -20,8 +20,8 @@ fpcdir=.
 rule=help
 
 [prerules]
-REQUIREDVERSION=3.0.4
-REQUIREDVERSION2=3.0.2
+REQUIREDVERSION=3.2.0
+REQUIREDVERSION2=3.0.4
 
 
 # make versions < 3.77 (OS2 version) are buggy
@@ -91,6 +91,12 @@ endif
 ifeq ($(CPU_TARGET),riscv64)
 PPSUF=rv64
 endif
+ifeq ($(CPU_TARGET),xtensa)
+PPSUF=xtensa
+endif
+ifeq ($(CPU_TARGET),z80)
+PPSUF=z80
+endif
 ifeq ($(CPU_TARGET),wasm)
 PPSUF=wasm
 endif
@@ -200,15 +206,24 @@ endif
 ifneq ($(OPT),)
 OPTNEW+=$(OPT)
 endif
+
+# some targets do not generate PIC by default so we have select explicitly
+# the general threading model when compiling the final versions of rtl and packages
+ifneq ($(findstring $(OS_TARGET),linux),)
+ifneq ($(findstring $(CPU_TARGET),i386 arm),)
+override OPTNEW+=-CVglobal-dynamic
+endif
+endif
+
 CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)' 'FPCMAKEOPT=$(OPT)'
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 
 # CPU targets for which we only build the compiler/rtl
 BuildOnlyBaseCPUs=jvm
 
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macosclassic $(BuildOnlyBaseCPUs) freertos
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -217,8 +232,6 @@ endif
 endif
 
 [rules]
-.NOTPARALLEL:
-
 # These values can change
 unexport FPC_VERSION FPC_COMPILERINFO OS_SOURCE
 
@@ -366,7 +379,11 @@ endif
 
 singlezipinstall: zipinstallall
 zipinstallall: $(BUILDSTAMP)
-        $(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+ifeq ($(FULL_SOURCE),$(FULL_TARGET))
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+else
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX).built.on.$(SOURCESUFFIX) $(INSTALLOPTS)
+endif
 
 
 ##########################################################################
@@ -385,4 +402,4 @@ crosszipinstall:
         $(MAKE) zipinstall CROSSINSTALL=1
 
 crosssinglezipinstall:
-        $(MAKE) fpc_zipinstall ZIPTARGET=crossinstall ZIPNAME=fpc $(INSTALLOPTS)
+	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(SOURCESUFFIX).cross.$(TARGETSUFFIX) $(INSTALLOPTS)

Різницю між файлами не показано, бо вона завелика
+ 387 - 69
compiler/Makefile


+ 45 - 10
compiler/Makefile.fpc

@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64 riscv32 riscv64 wasm
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa z80 wasm
 
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
@@ -89,6 +89,12 @@ endif
 ifdef RISCV64
 PPC_TARGET=riscv64
 endif
+ifdef XTENSA
+PPC_TARGET=xtensa
+endif
+ifdef Z80
+PPC_TARGET=z80
+endif
 
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
@@ -180,7 +186,6 @@ CPUSUF=386
 endif
 ifeq ($(CPC_TARGET),m68k)
 CPUSUF=68k
-ALLOW_WARNINGS=1
 endif
 ifeq ($(CPC_TARGET),powerpc)
 CPUSUF=ppc
@@ -225,6 +230,12 @@ endif
 ifeq ($(CPC_TARGET),riscv64)
 CPUSUF=rv64
 endif
+ifeq ($(CPC_TARGET),xtensa)
+CPUSUF=xtensa
+endif
+ifeq ($(CPC_TARGET),z80)
+CPUSUF=z80
+endif
 ifeq ($(CPC_TARGET),wasm)
 CPUSUF=wasm
 endif
@@ -286,12 +297,12 @@ endif
 
 # i386 specific
 ifeq ($(PPC_TARGET),i386)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 
 # x86_64 specific
 ifeq ($(PPC_TARGET),x86_64)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 
 # PowerPC specific
@@ -397,7 +408,13 @@ endif
 ifeq ($(OS_TARGET),win16)
 NoNativeBinaries=1
 endif
-ifeq ($(OS_TARGET),macos)
+ifeq ($(OS_TARGET),macosclassic)
+NoNativeBinaries=1
+endif
+ifeq ($(OS_TARGET),freertos)
+NoNativeBinaries=1
+endif
+ifeq ($(OS_TARGET),zxspectrum)
 NoNativeBinaries=1
 endif
 
@@ -594,8 +611,8 @@ endif
 # cpu targets
 #####################################################################
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64 riscv32 riscv64 wasm
-PPC_SUFFIXES=386 68k ppc sparc arm armeb x64 ppc64 mips mipsel avr jvm 8086 a64 sparc64 rv32 rv64 wasm
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa z80 wasm
+PPC_SUFFIXES=386 68k ppc sparc arm armeb x64 ppc64 mips mipsel avr jvm 8086 a64 sparc64 rv32 rv64 xtensa z80 wasm
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 SYMLINKINSTALL_TARGETS=$(addsuffix _symlink_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
@@ -702,7 +719,11 @@ insdataarch64 : aarch64/a64ins.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mka64ins.pp
         cd aarch64 && ..$(PATHSEP)utils$(PATHSEP)mka64ins$(SRCEXEEXT)
 
-insdat: insdatx86 insdatarm insdataarch64
+insdatz80 : z80/z80ins.dat
+	$(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkz80ins.pp
+        cd z80 && ..$(PATHSEP)utils$(PATHSEP)mkz80ins$(SRCEXEEXT)
+
+insdat: insdatx86 insdatarm insdataarch64 insdatz80
 
 regdatx86 : x86/x86reg.dat
 	$(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkx86reg.pp
@@ -739,11 +760,25 @@ regdatmips : mips/mipsreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkmpsreg.pp
         cd mips && ..$(PATHSEP)utils$(PATHSEP)mkmpsreg$(SRCEXEEXT)
 
+regdatz80 : z80/z80reg.dat
+            $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkz80reg.pp
+        cd z80 && ..$(PATHSEP)utils$(PATHSEP)mkz80reg$(SRCEXEEXT)
+
 regdatwasm : wasm/wasmreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkwasmreg.pp
         cd wasm && ..$(PATHSEP)utils$(PATHSEP)mkwasmreg$(SRCEXEEXT)
 
-regdat : regdatx86 regdatarm regdatsp regdatavr regdataarch64 regdatmips regdatsp64 regdatwasm
+regdat : regdatx86 regdatarm regdatsp regdatavr regdataarch64 regdatmips regdatsp64 regdatz80 regdatwasm
+
+intrdatx86 : x86/x86intr.dat
+		$(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkx86inl.pp
+        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86inl$(SRCEXEEXT)
+        cp -f x86/cpummprocs.inc ../rtl/x86_64
+		cp -f x86/cpumminnr.inc ../rtl/x86_64
+        cp -f x86/cpummprocs.inc ../rtl/i386
+		cp -f x86/cpumminnr.inc ../rtl/i386
+        
+intrdat : intrdatx86
 
 # revision.inc rule
 revision.inc :
@@ -997,7 +1032,7 @@ ifeq ($(OS_SOURCE),win64)
   EXCLUDE_80BIT_TARGETS=1
 endif
 
-ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64 riscv32 riscv64),)
+ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64 riscv32 riscv64 xtensa),)
   EXCLUDE_80BIT_TARGETS=1
 endif
 

+ 7 - 3
compiler/aarch64/a64att.inc

@@ -128,8 +128,11 @@
 'lsl',
 'lsr',
 'ror',
-'sxt',
-'uxt',
+'sxtb',
+'sxth',
+'sxtw',
+'uxtb',
+'uxth',
 'neg',
 'ngc',
 'mvn',
@@ -187,5 +190,6 @@
 'fcsel',
 'umov',
 'ins',
-'movi'
+'movi',
+'veor'
 );

+ 4 - 0
compiler/aarch64/a64atts.inc

@@ -187,5 +187,9 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 10 - 2
compiler/aarch64/a64ins.dat

@@ -257,9 +257,15 @@
 
 [ROR]
 
-[SXT]
+[SXTB]
 
-[UXT]
+[SXTH]
+
+[SXTW]
+
+[UXTB]
+
+[UXTH]
 
 [NEG]
 
@@ -376,3 +382,5 @@
 [INS]
 
 [MOVI]
+
+[VEOR]

+ 7 - 3
compiler/aarch64/a64op.inc

@@ -128,8 +128,11 @@ A_ASR,
 A_LSL,
 A_LSR,
 A_ROR,
-A_SXT,
-A_UXT,
+A_SXTB,
+A_SXTH,
+A_SXTW,
+A_UXTB,
+A_UXTH,
 A_NEG,
 A_NGC,
 A_MVN,
@@ -187,5 +190,6 @@ A_FCMMPE,
 A_FCSEL,
 A_UMOV,
 A_INS,
-A_MOVI
+A_MOVI,
+A_VEOR
 );

+ 67 - 2
compiler/aarch64/a64reg.dat

@@ -76,170 +76,235 @@ XZR,$01,$05,$1F,xzr,31,31
 WSP,$01,$04,$20,wsp,31,31
 SP,$01,$05,$20,sp,31,31
 
-
 ; vfp registers
+; generated by fpc/compiler/utils/gena64vfp.pp to avoid tedious typing
 B0,$04,$01,$00,b0,64,64
 H0,$04,$03,$00,h0,64,64
 S0,$04,$09,$00,s0,64,64
 D0,$04,$0a,$00,d0,64,64
 Q0,$04,$05,$00,q0,64,64
+V08B,$04,$17,$00,v0.8b,64,64
+V016B,$04,$18,$00,v0.16b,64,64
 B1,$04,$01,$01,b1,65,65
 H1,$04,$03,$01,h1,65,65
 S1,$04,$09,$01,s1,65,65
 D1,$04,$0a,$01,d1,65,65
 Q1,$04,$05,$01,q1,65,65
+V18B,$04,$17,$01,v1.8b,65,65
+V116B,$04,$18,$01,v1.16b,65,65
 B2,$04,$01,$02,b2,66,66
 H2,$04,$03,$02,h2,66,66
 S2,$04,$09,$02,s2,66,66
 D2,$04,$0a,$02,d2,66,66
 Q2,$04,$05,$02,q2,66,66
+V28B,$04,$17,$02,v2.8b,66,66
+V216B,$04,$18,$02,v2.16b,66,66
 B3,$04,$01,$03,b3,67,67
 H3,$04,$03,$03,h3,67,67
 S3,$04,$09,$03,s3,67,67
 D3,$04,$0a,$03,d3,67,67
 Q3,$04,$05,$03,q3,67,67
+V38B,$04,$17,$03,v3.8b,67,67
+V316B,$04,$18,$03,v3.16b,67,67
 B4,$04,$01,$04,b4,68,68
 H4,$04,$03,$04,h4,68,68
 S4,$04,$09,$04,s4,68,68
 D4,$04,$0a,$04,d4,68,68
 Q4,$04,$05,$04,q4,68,68
+V48B,$04,$17,$04,v4.8b,68,68
+V416B,$04,$18,$04,v4.16b,68,68
 B5,$04,$01,$05,b5,69,69
 H5,$04,$03,$05,h5,69,69
 S5,$04,$09,$05,s5,69,69
 D5,$04,$0a,$05,d5,69,69
 Q5,$04,$05,$05,q5,69,69
+V58B,$04,$17,$05,v5.8b,69,69
+V516B,$04,$18,$05,v5.16b,69,69
 B6,$04,$01,$06,b6,70,70
 H6,$04,$03,$06,h6,70,70
-S6,$04,$09,$06,s6,70,70
+S6,$04,$09,$06,s6,70,70                                                                     gena64vfp.pp
 D6,$04,$0a,$06,d6,70,70
 Q6,$04,$05,$06,q6,70,70
+V68B,$04,$17,$06,v6.8b,70,70
+V616B,$04,$18,$06,v6.16b,70,70
 B7,$04,$01,$07,b7,71,71
 H7,$04,$03,$07,h7,71,71
 S7,$04,$09,$07,s7,71,71
 D7,$04,$0a,$07,d7,71,71
 Q7,$04,$05,$07,q7,71,71
+V78B,$04,$17,$07,v7.8b,71,71
+V716B,$04,$18,$07,v7.16b,71,71
 B8,$04,$01,$08,b8,72,72
 H8,$04,$03,$08,h8,72,72
 S8,$04,$09,$08,s8,72,72
 D8,$04,$0a,$08,d8,72,72
 Q8,$04,$05,$08,q8,72,72
+V88B,$04,$17,$08,v8.8b,72,72
+V816B,$04,$18,$08,v8.16b,72,72
 B9,$04,$01,$09,b9,73,73
 H9,$04,$03,$09,h9,73,73
 S9,$04,$09,$09,s9,73,73
 D9,$04,$0a,$09,d9,73,73
 Q9,$04,$05,$09,q9,73,73
+V98B,$04,$17,$09,v9.8b,73,73
+V916B,$04,$18,$09,v9.16b,73,73
 B10,$04,$01,$0A,b10,74,74
 H10,$04,$03,$0A,h10,74,74
 S10,$04,$09,$0A,s10,74,74
 D10,$04,$0a,$0A,d10,74,74
 Q10,$04,$05,$0A,q10,74,74
+V108B,$04,$17,$0A,v10.8b,74,74
+V1016B,$04,$18,$0A,v10.16b,74,74
 B11,$04,$01,$0B,b11,75,75
 H11,$04,$03,$0B,h11,75,75
 S11,$04,$09,$0B,s11,75,75
 D11,$04,$0a,$0B,d11,75,75
 Q11,$04,$05,$0B,q11,75,75
+V118B,$04,$17,$0B,v11.8b,75,75
+V1116B,$04,$18,$0B,v11.16b,75,75
 B12,$04,$01,$0C,b12,76,76
 H12,$04,$03,$0C,h12,76,76
 S12,$04,$09,$0C,s12,76,76
 D12,$04,$0a,$0C,d12,76,76
 Q12,$04,$05,$0C,q12,76,76
+V128B,$04,$17,$0C,v12.8b,76,76
+V1216B,$04,$18,$0C,v12.16b,76,76
 B13,$04,$01,$0D,b13,77,77
 H13,$04,$03,$0D,h13,77,77
 S13,$04,$09,$0D,s13,77,77
 D13,$04,$0a,$0D,d13,77,77
 Q13,$04,$05,$0D,q13,77,77
+V138B,$04,$17,$0D,v13.8b,77,77
+V1316B,$04,$18,$0D,v13.16b,77,77
 B14,$04,$01,$0E,b14,78,78
 H14,$04,$03,$0E,h14,78,78
 S14,$04,$09,$0E,s14,78,78
 D14,$04,$0a,$0E,d14,78,78
 Q14,$04,$05,$0E,q14,78,78
+V148B,$04,$17,$0E,v14.8b,78,78
+V1416B,$04,$18,$0E,v14.16b,78,78
 B15,$04,$01,$0F,b15,79,79
 H15,$04,$03,$0F,h15,79,79
 S15,$04,$09,$0F,s15,79,79
 D15,$04,$0a,$0F,d15,79,79
 Q15,$04,$05,$0F,q15,79,79
+V158B,$04,$17,$0F,v15.8b,79,79
+V1516B,$04,$18,$0F,v15.16b,79,79
 B16,$04,$01,$10,b16,80,80
 H16,$04,$03,$10,h16,80,80
 S16,$04,$09,$10,s16,80,80
 D16,$04,$0a,$10,d16,80,80
 Q16,$04,$05,$10,q16,80,80
+V168B,$04,$17,$10,v16.8b,80,80
+V1616B,$04,$18,$10,v16.16b,80,80
 B17,$04,$01,$11,b17,81,81
 H17,$04,$03,$11,h17,81,81
 S17,$04,$09,$11,s17,81,81
 D17,$04,$0a,$11,d17,81,81
 Q17,$04,$05,$11,q17,81,81
+V178B,$04,$17,$11,v17.8b,81,81
+V1716B,$04,$18,$11,v17.16b,81,81
 B18,$04,$01,$12,b18,82,82
 H18,$04,$03,$12,h18,82,82
 S18,$04,$09,$12,s18,82,82
 D18,$04,$0a,$12,d18,82,82
 Q18,$04,$05,$12,q18,82,82
+V188B,$04,$17,$12,v18.8b,82,82
+V1816B,$04,$18,$12,v18.16b,82,82
 B19,$04,$01,$13,b19,83,83
 H19,$04,$03,$13,h19,83,83
 S19,$04,$09,$13,s19,83,83
 D19,$04,$0a,$13,d19,83,83
 Q19,$04,$05,$13,q19,83,83
+V198B,$04,$17,$13,v19.8b,83,83
+V1916B,$04,$18,$13,v19.16b,83,83
 B20,$04,$01,$14,b20,84,84
 H20,$04,$03,$14,h20,84,84
 S20,$04,$09,$14,s20,84,84
 D20,$04,$0a,$14,d20,84,84
 Q20,$04,$05,$14,q20,84,84
+V208B,$04,$17,$14,v20.8b,84,84
+V2016B,$04,$18,$14,v20.16b,84,84
 B21,$04,$01,$15,b21,85,85
 H21,$04,$03,$15,h21,85,85
 S21,$04,$09,$15,s21,85,85
 D21,$04,$0a,$15,d21,85,85
 Q21,$04,$05,$15,q21,85,85
+V218B,$04,$17,$15,v21.8b,85,85
+V2116B,$04,$18,$15,v21.16b,85,85
 B22,$04,$01,$16,b22,86,86
 H22,$04,$03,$16,h22,86,86
 S22,$04,$09,$16,s22,86,86
 D22,$04,$0a,$16,d22,86,86
 Q22,$04,$05,$16,q22,86,86
+V228B,$04,$17,$16,v22.8b,86,86
+V2216B,$04,$18,$16,v22.16b,86,86
 B23,$04,$01,$17,b23,87,87
 H23,$04,$03,$17,h23,87,87
 S23,$04,$09,$17,s23,87,87
 D23,$04,$0a,$17,d23,87,87
 Q23,$04,$05,$17,q23,87,87
+V238B,$04,$17,$17,v23.8b,87,87
+V2316B,$04,$18,$17,v23.16b,87,87
 B24,$04,$01,$18,b24,88,88
 H24,$04,$03,$18,h24,88,88
 S24,$04,$09,$18,s24,88,88
 D24,$04,$0a,$18,d24,88,88
 Q24,$04,$05,$18,q24,88,88
+V248B,$04,$17,$18,v24.8b,88,88
+V2416B,$04,$18,$18,v24.16b,88,88
 B25,$04,$01,$19,b25,89,89
 H25,$04,$03,$19,h25,89,89
 S25,$04,$09,$19,s25,89,89
 D25,$04,$0a,$19,d25,89,89
 Q25,$04,$05,$19,q25,89,89
+V258B,$04,$17,$19,v25.8b,89,89
+V2516B,$04,$18,$19,v25.16b,89,89
 B26,$04,$01,$1A,b26,90,90
 H26,$04,$03,$1A,h26,90,90
 S26,$04,$09,$1A,s26,90,90
 D26,$04,$0a,$1A,d26,90,90
 Q26,$04,$05,$1A,q26,90,90
+V268B,$04,$17,$1A,v26.8b,90,90
+V2616B,$04,$18,$1A,v26.16b,90,90
 B27,$04,$01,$1B,b27,91,91
 H27,$04,$03,$1B,h27,91,91
 S27,$04,$09,$1B,s27,91,91
 D27,$04,$0a,$1B,d27,91,91
 Q27,$04,$05,$1B,q27,91,91
+V278B,$04,$17,$1B,v27.8b,91,91
+V2716B,$04,$18,$1B,v27.16b,91,91
 B28,$04,$01,$1C,b28,92,92
 H28,$04,$03,$1C,h28,92,92
 S28,$04,$09,$1C,s28,92,92
 D28,$04,$0a,$1C,d28,92,92
 Q28,$04,$05,$1C,q28,92,92
+V288B,$04,$17,$1C,v28.8b,92,92
+V2816B,$04,$18,$1C,v28.16b,92,92
 B29,$04,$01,$1D,b29,93,93
 H29,$04,$03,$1D,h29,93,93
 S29,$04,$09,$1D,s29,93,93
 D29,$04,$0a,$1D,d29,93,93
 Q29,$04,$05,$1D,q29,93,93
+V298B,$04,$17,$1D,v29.8b,93,93
+V2916B,$04,$18,$1D,v29.16b,93,93
 B30,$04,$01,$1E,b30,94,94
 H30,$04,$03,$1E,h30,94,94
 S30,$04,$09,$1E,s30,94,94
 D30,$04,$0a,$1E,d30,94,94
 Q30,$04,$05,$1E,q30,94,94
+V308B,$04,$17,$1E,v30.8b,94,94
+V3016B,$04,$18,$1E,v30.16b,94,94
 B31,$04,$01,$1F,b31,95,95
 H31,$04,$03,$1F,h31,95,95
 S31,$04,$09,$1F,s31,95,95
 D31,$04,$0a,$1F,d31,95,95
 Q31,$04,$05,$1F,q31,95,95
+V318B,$04,$17,$1F,v31.8b,95,95
+V3116B,$04,$18,$1F,v31.16b,95,95
 
 NZCV,$05,$00,$00,nzcv,0,0
 FPCR,$05,$00,$01,fpcr,0,0
 FPSR,$05,$00,$02,fpsr,0,0
 TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0
+

+ 123 - 11
compiler/aarch64/aasmcpu.pas

@@ -157,6 +157,8 @@ uses
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadconditioncode(opidx: longint; const c: tasmcond);
+         procedure loadrealconst(opidx: longint; const _value: bestreal);
+
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -168,6 +170,7 @@ uses
          constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
          constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
+         constructor op_reg_realconst(op: tasmop; _op1: tregister; _op2: bestreal);
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
@@ -180,7 +183,6 @@ uses
          constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
          constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
 
-
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
 
@@ -280,6 +282,19 @@ implementation
       end;
 
 
+    procedure taicpu.loadrealconst(opidx:longint;const _value:bestreal);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_realconst then
+              clearop(opidx);
+            val_real:=_value;
+            typ:=top_realconst;
+          end;
+      end;
+
+
 {*****************************************************************************
                                  taicpu Constructors
 *****************************************************************************}
@@ -382,6 +397,15 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_realconst(op : tasmop; _op1 : tregister; _op2 : bestreal);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadrealconst(1,_op2);
+      end;
+
+
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
        begin
          inherited create(op);
@@ -528,7 +552,7 @@ implementation
       const
         { invalid sizes for aarch64 are 0 }
         subreg2bytesize: array[TSubRegister] of byte =
-          (0,0,0,0,4,8,0,0,0,4,8,0,0,0,0,0,0,0,0,0,0,0,0);
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0,0,0,0,0,0,0,0,0,0,8,16,0);
       var
         scalefactor: byte;
       begin
@@ -554,22 +578,40 @@ implementation
       begin
         result:=sr_complex;
         if not assigned(ref.symboldata) and
-           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+           not(ref.refaddr in [addr_pic,addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
           exit;
         { can't use pre-/post-indexed mode here (makes no sense either) }
         if ref.addressmode<>AM_OFFSET then
           exit;
         { "ldr literal" must be a 32/64 bit LDR and have a symbol }
-        if assigned(ref.symboldata) and
-           ((op<>A_LDR) or
+        if (ref.refaddr=addr_pic) and
+           (not (op in [A_LDR,A_B,A_BL]) or
             not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
-            not assigned(ref.symbol)) then
+            (not assigned(ref.symbol) and
+             not assigned(ref.symboldata))) then
           exit;
         { if this is a (got) page offset load, we must have a base register and a
-          symbol }
+          symbol (except if we have an ADD with a non-got page offset load) }
         if (ref.refaddr in [addr_gotpageoffset,addr_pageoffset]) and
-           (not assigned(ref.symbol) or
-            (ref.base=NR_NO) or
+           (
+             (
+               (
+                 (op<>A_ADD) or
+                 (ref.refaddr=addr_gotpageoffset)
+               ) and
+               (
+                 not assigned(ref.symbol) or
+                 (ref.base=NR_NO)
+               )
+             ) or
+             (
+               (
+                 (op=A_ADD) and
+                 (ref.refaddr=addr_pageoffset)
+               ) and
+               not assigned(ref.symbol) and
+               (ref.base=NR_NO)
+             ) or
             (ref.index<>NR_NO) or
             (ref.offset<>0)) then
           begin
@@ -866,10 +908,13 @@ implementation
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
         case opcode of
-          A_B,A_BL,
+          A_B,A_BL,A_BR,A_BLR,
           A_CMN,A_CMP,
           A_CCMN,A_CCMP,
-          A_TST:
+          A_TST,
+          A_FCMP,A_FCMPE,
+          A_CBZ,A_CBNZ,
+          A_RET:
             result:=operand_read;
           A_STR,A_STUR:
             if opnr=0 then
@@ -902,11 +947,78 @@ implementation
                  { check for pre/post indexed in spilling_get_operation_type_ref }
                  result:=operand_read;
              end;
+{$ifdef EXTDEBUG}
+           { play save to avoid hard to find bugs, better fail at compile time }
+           A_ADD,
+           A_ADRP,
+           A_AND,
+           A_ASR,
+           A_BFI,
+           A_BFXIL,
+           A_CLZ,
+           A_CSEL,
+           A_CSET,
+           A_CSETM,
+           A_FABS,
+           A_EON,
+           A_EOR,
+           A_FADD,
+           A_FCVT,
+           A_FDIV,
+           A_FMADD,
+           A_FMOV,
+           A_FMSUB,
+           A_FMUL,
+           A_FNEG,
+           A_FNMADD,
+           A_FNMSUB,
+           A_FRINTX,
+           A_FSQRT,
+           A_FSUB,
+           A_ORR,
+           A_LSL,
+           A_LSLV,
+           A_LSR,
+           A_LSRV,
+           A_MOV,
+           A_MOVK,
+           A_MOVN,
+           A_MOVZ,
+           A_MSUB,
+           A_MUL,
+           A_MVN,
+           A_NEG,
+           A_LDR,
+           A_LDUR,
+           A_RBIT,
+           A_ROR,
+           A_RORV,
+           A_SBFX,
+           A_SCVTF,
+           A_FCVTZS,
+           A_SDIV,
+           A_SMULL,
+           A_SUB,
+           A_SXT,
+           A_UBFIZ,
+           A_UBFX,
+           A_UCVTF,
+           A_UDIV,
+           A_UMULL,
+           A_UXT:
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
+           else
+             Internalerror(2019090802);
+{$else EXTDEBUG}
            else
              if opnr=0 then
                result:=operand_write
              else
                result:=operand_read;
+{$endif EXTDEBUG}
         end;
       end;
 

+ 512 - 10
compiler/aarch64/agcpugas.pas

@@ -30,7 +30,7 @@ unit agcpugas;
 
     uses
        globtype,systems,
-       aasmtai,
+       aasmtai,aasmdata,aasmbase,
        assemble,aggas,
        cpubase,cpuinfo;
 
@@ -47,14 +47,22 @@ unit agcpugas;
         constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
       end;
 
+      TAArch64ClangGASAssembler=class(TAArch64Assembler)
+      private
+        procedure TransformSEHDirectives(list:TAsmList);
+      protected
+        function sectionflags(secflags:TSectionFlags):string;override;
+      public
+        procedure WriteAsmList; override;
+      end;
 
     const
       gas_shiftmode2str : array[tshiftmode] of string[4] = (
-        '','lsl','lsr','asr',
+        '','lsl','lsr','asr','ror',
         'uxtb','uxth','uxtw','uxtx',
         'sxtb','sxth','sxtw','sxtx');
 
-    const 
+    const
       cputype_to_gas_march : array[tcputype] of string = (
         '', // cpu_none
         'armv8'
@@ -63,7 +71,7 @@ unit agcpugas;
   implementation
 
     uses
-       cutils,globals,verbose,
+       cutils,cclasses,globals,verbose,
        aasmcpu,
        itcpugas,
        cgbase,cgutils;
@@ -90,6 +98,474 @@ unit agcpugas;
       end;
 
 
+{****************************************************************************}
+{                      CLang AArch64 Assembler writer                        }
+{****************************************************************************}
+
+    procedure TAArch64ClangGASAssembler.TransformSEHDirectives(list:TAsmList);
+
+      function convert_unwinddata(list:tasmlist):tdynamicarray;
+
+        procedure check_offset(ofs,max:dword);
+          begin
+            if ((ofs and $7)<>0) or (ofs>max) then
+              internalerror(2020041210);
+          end;
+
+        procedure check_reg(reg:tregister;rt:TRegisterType;min:TSuperRegister);
+          begin
+            if (getregtype(reg)<>rt) or (getsupreg(reg)<min) then
+              internalerror(2020041211);
+          end;
+
+        procedure writebyte(b:byte); inline;
+          begin
+            result.write(b,sizeof(b));
+          end;
+
+        procedure writeword(w:word);
+          begin
+            w:=NtoBE(w);
+            result.write(w,sizeof(w));
+          end;
+
+        procedure writedword(dw:dword);
+          begin
+            dw:=NtoBE(dw);
+            result.write(dw,sizeof(dw));
+          end;
+
+        const
+          min_int_reg = 19;
+          min_mm_reg = 8;
+        var
+          hp : tai;
+          seh : tai_seh_directive absolute hp;
+        begin
+          result:=tdynamicarray.create(0);
+          hp:=tai(list.last);
+          while assigned(hp) do
+            begin
+              if hp.typ<>ait_seh_directive then
+                internalerror(2020041502);
+              case seh.kind of
+                ash_stackalloc:
+                  begin
+                    if (seh.data.offset and $f)<>0 then
+                      internalerror(2020041207);
+                    if seh.data.offset<((1 shl 5)*16) then
+                      writebyte(byte(seh.data.offset shr 4))
+                    else if seh.data.offset<((1 shl 11)*16) then
+                      writeword($C000 or word(seh.data.offset shr 4))
+                    else if seh.data.offset<((1 shl 24)*16) then
+                      writedword($E0000000 or (seh.data.offset shr 4))
+                    else begin
+                      writeln(hexstr(seh.data.offset,8));
+                      internalerror(2020041209);
+                    end;
+                  end;
+                ash_addfp:
+                  begin
+                    check_offset(seh.data.offset,(1 shl 7)*8);
+                    writeword($E200 or (seh.data.offset shr 3));
+                  end;
+                ash_setfp:
+                  writebyte($E1);
+                ash_nop:
+                  writebyte($E3);
+                ash_savefplr:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    writebyte($40 or (seh.data.offset shr 3));
+                  end;
+                ash_savefplr_x:
+                  begin
+                    check_offset(seh.data.offset,512);
+                    writebyte($80 or (seh.data.offset shr 3)-1);
+                  end;
+                ash_savereg:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($C000 or ((getsupreg(seh.data.reg)-min_int_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_savereg_x:
+                  begin
+                    check_offset(seh.data.offset,256);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($C400 or ((getsupreg(seh.data.reg)-min_int_reg) shl 5) or ((seh.data.offset shr 3)-1));
+                  end;
+                ash_saveregp:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($C800 or ((getsupreg(seh.data.reg)-min_int_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_saveregp_x:
+                  begin
+                    check_offset(seh.data.offset,512);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($CC00 or ((getsupreg(seh.data.reg)-min_int_reg) shl 6) or ((seh.data.offset shr 3)-1));
+                  end;
+                ash_savefreg:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($DC00 or ((getsupreg(seh.data.reg)-min_mm_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_savefreg_x:
+                  begin
+                    check_offset(seh.data.offset,256);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($CE00 or ((getsupreg(seh.data.reg)-min_mm_reg) shl 5) or ((seh.data.offset shr 3)-1));
+                  end;
+                ash_savefregp:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($D800 or ((getsupreg(seh.data.reg)-min_mm_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_savefregp_x:
+                  begin
+                    check_offset(seh.data.offset,512);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($DA00 or ((getsupreg(seh.data.reg)-min_int_reg) shl 6) or ((seh.data.offset shr 3)-1));
+                  end;
+                else
+                  internalerror(2020041503);
+              end;
+              hp:=tai(hp.previous);
+            end;
+        end;
+
+      var
+        unwinddata : tdynamicarray;
+
+      procedure writebyte(b:byte);
+        begin
+          unwinddata.write(b,sizeof(b));
+        end;
+
+      var
+        hp,hpnext,hpdata : tai;
+        seh : tai_seh_directive absolute hp;
+        lastsym : tai_symbol;
+        lastsec : tai_section;
+        inprologue,
+        inhandlerdata,
+        deleteai : boolean;
+        totalcount,
+        instrcount,
+        datacount : sizeint;
+        handlername : tsymstr;
+        handlerflags : byte;
+        handlerdata : array of tai;
+        handlerdataidx : sizeint;
+        handlerdatacount : tai;
+        sehlist,
+        tmplist : TAsmList;
+        xdatasym : tasmsymbol;
+        unwindread,
+        unwindrec : longword;
+      begin
+        if not assigned(list) then
+          exit;
+
+        lastsym:=nil;
+        tmplist:=nil;
+        sehlist:=nil;
+        lastsec:=nil;
+        instrcount:=0;
+        datacount:=0;
+        unwinddata:=nil;
+        inhandlerdata:=false;
+        inprologue:=false;
+        handlerdata:=nil;
+        handlerdataidx:=0;
+        handlerdatacount:=nil;
+        handlerflags:=0;
+        handlername:='';
+
+        hp:=tai(list.first);
+        while assigned(hp) do
+          begin
+            deleteai:=false;
+            case hp.typ of
+              ait_section:
+                begin
+                  if assigned(sehlist) then
+                    begin
+                      if assigned(lastsec) and (tai_section(hp).name^=lastsec.name^) then
+                        begin
+                          { this section was only added due to the now removed SEH data }
+                          deleteai:=true;
+                          dec(list.section_count);
+                        end
+                      else
+                        internalerror(2020041214);
+                    end
+                  else
+                    lastsec:=tai_section(hp);
+
+                  if assigned(tmplist) then
+                    begin
+                      list.insertListBefore(hp,tmplist);
+                      tmplist.free;
+                      tmplist:=nil;
+                    end;
+                end;
+              ait_symbol:
+                begin
+                  if tai_symbol(hp).is_global then
+                    lastsym:=tai_symbol(hp);
+                end;
+              ait_instruction:
+                if assigned(sehlist) then
+                  inc(instrcount);
+              ait_const:
+                if assigned(sehlist) then
+                  inc(datacount,tai_const(hp).size);
+              ait_seh_directive:
+                begin
+                  if not assigned(sehlist) and (seh.kind<>ash_proc) then
+                    internalerror(2020041208);
+                  { most seh directives are removed }
+                  deleteai:=true;
+                  case seh.kind of
+                    ash_proc:
+                      begin
+                        if not assigned(lastsec) then
+                          internalerror(2020041203);
+                        datacount:=0;
+                        instrcount:=0;
+                        handlerflags:=0;
+                        handlername:='';
+                        sehlist:=tasmlist.create;
+                        inprologue:=true;
+                      end;
+                    ash_endproc:
+                      begin
+                        if not assigned(sehlist) then
+                          internalerror(2020041501);
+                        if assigned(tmplist) then
+                          internalerror(2020041302);
+                        if not assigned(lastsym) then
+                          internalerror(2020041303);
+                        if inprologue then
+                          cgmessage(asmw_e_missing_endprologue);
+
+                        unwinddata:=convert_unwinddata(sehlist);
+
+                        writebyte($E4);
+
+                        { fill up with NOPs }
+                        while unwinddata.size mod 4<>0 do
+                          writebyte($E3);
+
+                        { note: we can pass Nil here, because in case of a LLVM
+                                backend this whole code shouldn't be required
+                                anyway }
+                        xdatasym:=current_asmdata.DefineAsmSymbol('xdata_'+lastsec.name^,AB_LOCAL,AT_DATA,nil);
+
+                        tmplist:=tasmlist.create;
+                        new_section(tmplist,sec_pdata,lastsec.name^,0);
+                        tmplist.concat(tai_const.Create_rva_sym(lastsym.sym));
+                        tmplist.concat(tai_const.Create_rva_sym(xdatasym));
+
+                        new_section(tmplist,sec_rodata,xdatasym.name,0);
+                        tmplist.concat(tai_symbol.Create(xdatasym,0));
+
+                        tmplist.concat(tai_comment.Create(strpnew('instr: '+tostr(instrcount)+', data: '+tostr(datacount)+', unwind: '+tostr(unwinddata.size))));
+
+                        {$ifdef EXTDEBUG}
+                        comment(V_Debug,'got section: '+lastsec.name^);
+                        comment(V_Debug,'got instructions: '+tostr(instrcount));
+                        comment(V_Debug,'got data: '+tostr(datacount));
+                        comment(V_Debug,'got unwinddata: '+tostr(unwinddata.size));
+                        {$endif EXTDEBUG}
+
+                        if datacount mod 4<>0 then
+                          cgmessage(asmw_e_seh_invalid_data_size);
+
+                        totalcount:=datacount div 4+instrcount;
+
+                        { splitting to multiple pdata/xdata sections is not yet
+                          supported, so 1 MB is our limit for now }
+                        if totalcount>(1 shl 18) then
+                          comment(V_Error,'Function is larger than 1 MB which is not supported for SEH currently');
+
+                        unwindrec:=min(totalcount,(1 shl 18)-1);
+                        if handlerflags<>0 then
+                          unwindrec:=unwindrec or (1 shl 20);
+
+                        { currently we only have one epilog, so E needs to be
+                          set to 1 and epilog scope index needs to be 0, no
+                          matter if we require the extension for the unwinddata
+                          or not }
+                        unwindrec:=unwindrec or (1 shl 21);
+
+                        if unwinddata.size div 4<=31 then
+                          unwindrec:=unwindrec or ((unwinddata.size div 4) shl 27);
+
+                        { exception record headers }
+                        tmplist.concat(tai_const.Create_32bit(unwindrec));
+                        if cs_asm_source in init_settings.globalswitches then
+                          tmplist.concat(tai_comment.create(strpnew(hexstr(unwindrec,8))));
+
+                        if unwinddata.size div 4>31 then
+                          begin
+                            { once we're able to split a .pdata entry this can be
+                              removed as well }
+                            if unwinddata.size div 4>255 then
+                              comment(V_Error,'Too many unwind codes for SEH');
+                            unwindrec:=(unwinddata.size div 4) shl 16;
+                            tmplist.concat(tai_const.create_32bit(unwindrec));
+                            if cs_asm_source in init_settings.globalswitches then
+                              tmplist.concat(tai_comment.create(strpnew(hexstr(unwindrec,8))));
+                          end;
+
+                        { unwind codes }
+                        unwinddata.seek(0);
+                        while unwinddata.pos<unwinddata.size do
+                          begin
+                            unwinddata.read(unwindrec,sizeof(longword));
+                            tmplist.concat(tai_const.Create_32bit(unwindrec));
+                            if cs_asm_source in init_settings.globalswitches then
+                              tmplist.concat(tai_comment.create(strpnew(hexstr(unwindrec,8))));
+                          end;
+                        unwinddata.free;
+
+                        if handlerflags<>0 then
+                          begin
+                            tmplist.concat(tai_const.Create_rva_sym(current_asmdata.RefAsmSymbol(handlername,AT_FUNCTION,false)));
+                            if length(handlerdata)>0 then
+                              begin
+                                tmplist.concat(handlerdatacount);
+                                for handlerdataidx:=0 to high(handlerdata) do
+                                  tmplist.concat(handlerdata[handlerdataidx]);
+                              end;
+                          end;
+
+                        handlerdata:=nil;
+
+                        sehlist.free;
+                        sehlist:=nil;
+                      end;
+                    ash_endprologue:
+                      inprologue:=false;
+                    ash_handler:
+                      begin
+                        handlername:=seh.data.name^;
+                        handlerflags:=seh.data.flags;
+                      end;
+                    ash_handlerdata:
+                      begin
+                        if handlername='' then
+                          cgmessage(asmw_e_handlerdata_no_handler);
+                        hpdata:=tai(hp.next);
+                        if not assigned(hpdata) or (hpdata.typ<>ait_const) or (tai_const(hpdata).consttype<>aitconst_32bit) then
+                          internalerror(2020041215);
+                        handlerdatacount:=hpdata;
+                        setlength(handlerdata,tai_const(hpdata).value*4);
+                        handlerdataidx:=0;
+                        hpnext:=tai(hpdata.next);
+                        list.remove(hpdata);
+                        hpdata:=hpnext;
+                        while (handlerdataidx<length(handlerdata)) and assigned(hpdata) do
+                          begin
+                            if (hpdata.typ<>ait_const) or not (tai_const(hpdata).consttype in [aitconst_32bit,aitconst_rva_symbol]) then
+                              internalerror(2020041212);
+                            handlerdata[handlerdataidx]:=hpdata;
+                            inc(handlerdataidx);
+                            hpnext:=tai(hpdata.next);
+                            list.remove(hpdata);
+                            hpdata:=hpnext;
+                          end;
+                        if handlerdataidx<length(handlerdata) then
+                          internalerror(2020041213);
+                      end;
+                    ash_stackalloc,
+                    ash_addfp,
+                    ash_setfp,
+                    ash_nop,
+                    ash_savefplr,
+                    ash_savefplr_x,
+                    ash_savereg,
+                    ash_savereg_x,
+                    ash_saveregp,
+                    ash_saveregp_x,
+                    ash_savefreg,
+                    ash_savefreg_x,
+                    ash_savefregp,
+                    ash_savefregp_x:
+                      begin
+                        if not assigned(sehlist) then
+                          internalerror(2020041504);
+                        if not inprologue then
+                          internalerror(2020041505);
+                        hpdata:=hp;
+                        hp:=tai(hp.previous);
+                        list.Remove(hpdata);
+                        sehlist.concat(hpdata);
+                        { don't delete this }
+                        deleteai:=false;
+                      end;
+                    else
+                      internalerror(2020041206);
+                  end;
+                end;
+              else
+                { ignore }
+                ;
+            end;
+
+            if deleteai then
+              begin
+                hpnext:=tai(hp.next);
+                list.remove(hp);
+                hp.free;
+                hp:=hpnext;
+              end
+            else
+              hp:=tai(hp.next);
+          end;
+
+        if assigned(sehlist) then
+          internalerror(2020041205);
+
+        if assigned(tmplist) then
+          begin
+            list.concatlist(tmplist);
+            tmplist.free;
+          end;
+      end;
+
+
+    function TAArch64ClangGASAssembler.sectionflags(secflags:TSectionFlags):string;
+      begin
+        Result:=inherited sectionflags(secflags);
+        if (target_info.system=system_aarch64_win64) then
+          begin
+            { we require an explicit "r" if write is not allowed }
+            if not (SF_W in secflags) then
+              result:=result+'r';
+          end;
+      end;
+
+
+    procedure TAArch64ClangGASAssembler.WriteAsmList;
+      begin
+        { clang does not support all the directives we need, so we need to
+          manually transform them to pdata/xdata records }
+        if target_info.system=system_aarch64_win64 then
+          begin
+            TransformSEHDirectives(current_asmdata.AsmLists[al_pure_assembler]);
+            TransformSEHDirectives(current_asmdata.AsmLists[al_procedures]);
+          end;
+        inherited WriteAsmList;
+      end;
+
+
 {****************************************************************************}
 {                  Helper routines for Instruction Writer                    }
 {****************************************************************************}
@@ -119,9 +595,13 @@ unit agcpugas;
                     result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
                   else
                     result:=linux_addrpage2str[ref.refaddr]+ref.symbol.name
-                end
+                end;
+              addr_pic,
+              { for locals replaced by temp symbols on LLVM }
+              addr_no:
+                result:=ref.symbol.name;
               else
-                internalerror(2015022301);
+                internalerror(2015022302);
             end
           end
         else
@@ -236,6 +716,11 @@ unit agcpugas;
               end
             else
               getopstr:=getreferencestring(asminfo,o.ref^);
+          top_realconst:
+            begin
+              str(o.val_real,Result);
+              Result:='#'+Result;
+            end
           else
             internalerror(2014121507);
         end;
@@ -279,25 +764,42 @@ unit agcpugas;
             supported_targets : [system_aarch64_linux,system_aarch64_android];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
+            labelmaxlen : -1;
             comment : '// ';
             dollarsign: '$';
           );
 
        as_aarch64_clang_darwin_info : tasminfo =
           (
-            id     : as_clang;
+            id     : as_clang_asdarwin;
             idtxt  : 'CLANG';
             asmbin : 'clang';
-            asmcmd : '-c -o $OBJ $EXTRAOPT -arch arm64 $DARWINVERSION -x assembler $ASM';
-            supported_targets : [system_aarch64_darwin];
-            flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
+            asmcmd : '-x assembler -c -target $TRIPLET -o $OBJ $EXTRAOPT -x assembler $ASM';
+            supported_targets : [system_aarch64_ios,system_aarch64_darwin];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_llvm];
             labelprefix : 'L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );
 
+       as_aarch64_clang_gas_info : tasminfo =
+          (
+            id     : as_clang_gas;
+            idtxt  : 'CLANG';
+            asmbin : 'clang';
+            asmcmd : '-x assembler -c -target $TRIPLET -o $OBJ $EXTRAOPT -x assembler $ASM';
+            supported_targets : [system_aarch64_win64];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_llvm];
+            labelprefix : '.L';
+            labelmaxlen : -1;
+            comment : '// ';
+            dollarsign: '$';
+          );
+
 
 begin
   RegisterAssembler(as_aarch64_gas_info,TAArch64Assembler);
   RegisterAssembler(as_aarch64_clang_darwin_info,TAArch64AppleAssembler);
+  RegisterAssembler(as_aarch64_clang_gas_info,TAArch64ClangGASAssembler);
 end.

+ 472 - 40
compiler/aarch64/aoptcpu.pas

@@ -32,33 +32,33 @@ Interface
     uses
       globtype, globals,
       cutils,
-      cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
+      cgbase, cpubase, aasmtai, aasmcpu,
+      aopt, aoptcpub, aoptarm;
 
     Type
-      TCpuAsmOptimizer = class(TAsmOptimizer)
+      TCpuAsmOptimizer = class(TARMAsmOptimizer)
         { uses the same constructor as TAopObj }
         function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
-        procedure PeepHoleOptPass2;override;
-        function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
+        function PostPeepHoleOptsCpu(var p: tai): boolean; override;
+        function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
+        function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
         function LookForPostindexedPattern(p : taicpu) : boolean;
-        procedure DebugMsg(const s : string; p : tai);
+      private
+        function OptPass1Shift(var p: tai): boolean;
+        function OptPostCMP(var p: tai): boolean;
+        function OptPass1Data(var p: tai): boolean;
+        function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
+        function OptPass1STP(var p: tai): boolean;
+        function OptPass1Mov(var p: tai): boolean;
       End;
 
 Implementation
 
   uses
-    aasmbase;
-
-{$ifdef DEBUG_AOPTCPU}
-  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
-    begin
-      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
-    end;
-{$else DEBUG_AOPTCPU}
-  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
-    begin
-    end;
-{$endif DEBUG_AOPTCPU}
+    aasmbase,
+    aoptutils,
+    cgutils,
+    verbose;
 
   function CanBeCond(p : tai) : boolean;
     begin
@@ -66,35 +66,86 @@ Implementation
     end;
 
 
-  function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
+  function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
+    var
+      p: taicpu;
     begin
-      result :=
-        (instr.typ = ait_instruction) and
-        ((op = []) or (taicpu(instr).opcode in op)) and
-        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
-    end;
+      p := taicpu(hp);
+      Result := false;
+      if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
+        exit;
 
+      case p.opcode of
+        { These operands do not write into a register at all }
+        A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
+          exit;
+        {Take care of post/preincremented store and loads, they will change their base register}
+        A_STR, A_LDR:
+          begin
+            Result := false;
+            { actually, this does not apply here because post-/preindexed does not mean that a register
+              is loaded with a new value, it is only modified
+              (taicpu(p).oper[1]^.typ=top_ref) and
+              (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+              (taicpu(p).oper[1]^.ref^.base = reg);
+            }
+            { STR does not load into it's first register }
+            if p.opcode = A_STR then
+              exit;
+          end;
+        else
+          ;
+      end;
 
-  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
-    begin
-      result :=
-        (instr.typ = ait_instruction) and
-        (taicpu(instr).opcode = op) and
-        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+      if Result then
+        exit;
+
+      case p.oper[0]^.typ of
+        top_reg:
+          Result := (p.oper[0]^.reg = reg);
+        top_ref:
+          Result :=
+            (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+            (taicpu(p).oper[0]^.ref^.base = reg);
+        else
+          ;
+      end;
     end;
 
 
-  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
-    Out Next: tai; reg: TRegister): Boolean;
+  function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
+    var
+      p: taicpu;
+      i: longint;
     begin
-      Next:=Current;
-      repeat
-        Result:=GetNextInstruction(Next,Next);
-      until not (Result) or
-            not(cs_opt_level3 in current_settings.optimizerswitches) or
-            (Next.typ<>ait_instruction) or
-            RegInInstruction(reg,Next) or
-            is_calljmp(taicpu(Next).opcode);
+      instructionLoadsFromReg := false;
+      if not (assigned(hp) and (hp.typ = ait_instruction)) then
+        exit;
+      p:=taicpu(hp);
+
+      i:=1;
+
+      { Start on oper[0]? }
+      if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
+        i:=0;
+
+      while(i<p.ops) do
+        begin
+          case p.oper[I]^.typ of
+            top_reg:
+              Result := (p.oper[I]^.reg = reg);
+            top_ref:
+              Result :=
+                (p.oper[I]^.ref^.base = reg) or
+                (p.oper[I]^.ref^.index = reg);
+            else
+              ;
+          end;
+          { Bailout if we found something }
+          if Result then
+            exit;
+          Inc(I);
+        end;
     end;
 
   {
@@ -145,7 +196,336 @@ Implementation
     end;
 
 
+  function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
+    var
+      alloc,
+      dealloc : tai_regalloc;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+           ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
+          ) { or
+          (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
+          (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
+         ) and
+         (taicpu(movp).ops=2) and
+         MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
+         { the destination register of the mov might not be used beween p and movp }
+         not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+         { Take care to only do this for instructions which REALLY load to the first register.
+           Otherwise
+             str reg0, [reg1]
+             fmov reg2, reg0
+           will be optimized to
+             str reg2, [reg1]
+         }
+         RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
+        begin
+          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
+          if assigned(dealloc) then
+            begin
+              DebugMsg('Peephole '+optimizer+' removed superfluous vmov', movp);
+              result:=true;
+
+              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
+                and remove it if possible }
+              asml.Remove(dealloc);
+              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  alloc.free;
+                  dealloc.free;
+                end
+              else
+                asml.InsertAfter(dealloc,p);
+
+              { try to move the allocation of the target register }
+              GetLastInstruction(movp,hp1);
+              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  asml.InsertBefore(alloc,p);
+                  { adjust used regs }
+                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
+                end;
+
+              { change
+                  vldr reg0,[reg1]
+                  vmov reg2,reg0
+                into
+                  ldr reg2,[reg1]
+
+                if reg2 is an int register
+              if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
+                taicpu(p).opcode:=A_LDR;
+              }
+
+              { finally get rid of the mov }
+              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              asml.remove(movp);
+              movp.free;
+            end;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
+    var
+      hp1,hp2: tai;
+      I2, I: Integer;
+      shifterop: tshifterop;
+    begin
+      Result:=false;
+      { This folds shifterops into following instructions
+        <shiftop> r0, r1, #imm
+        <op> r2, r3, r0
+
+        to
+
+        <op> r2, r3, r1, <shiftop> #imm
+      }
+      { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
+      if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
+         MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
+         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+         MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
+                                A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
+                                A_SUB, A_TST], [PF_None]) and
+         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
+         (taicpu(hp1).ops >= 2) and
+         { Currently we can't fold into another shifterop }
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
+         { SP does not work completely with shifted registers, as I didn't find the exact rules,
+           we do not operate on SP }
+         (taicpu(hp1).oper[0]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[1]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
+         { reg1 might not be modified inbetween }
+         not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
+         (
+           { Only ONE of the two src operands is allowed to match }
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
+         ) and
+         { for SUB, the last operand must match, there is no RSB on AArch64 }
+         ((taicpu(hp1).opcode<>A_SUB) or
+          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
+        begin
+          { for the two operand instructions, start also at the second operand as they are not always commutative
+            (depends on the flags tested laster on) and thus the operands cannot swapped }
+          I2:=1;
+          for I:=I2 to taicpu(hp1).ops-1 do
+            if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
+              begin
+                { If the parameter matched on the second op from the RIGHT
+                  we have to switch the parameters, this will not happen for CMP
+                  were we're only evaluating the most right parameter
+                }
+                shifterop_reset(shifterop);
+                case taicpu(p).opcode of
+                  A_LSL:
+                    shifterop.shiftmode:=SM_LSL;
+                  A_ROR:
+                    shifterop.shiftmode:=SM_ROR;
+                  A_LSR:
+                    shifterop.shiftmode:=SM_LSR;
+                  A_ASR:
+                    shifterop.shiftmode:=SM_ASR;
+                  else
+                    InternalError(2019090401);
+                end;
+                shifterop.shiftimm:=taicpu(p).oper[2]^.val;
+
+                if I <> taicpu(hp1).ops-1 then
+                  begin
+                    if taicpu(hp1).ops = 3 then
+                      hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
+                           taicpu(p).oper[1]^.reg, shifterop)
+                    else
+                      hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                           shifterop);
+                  end
+                else
+                  if taicpu(hp1).ops = 3 then
+                    hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
+                         taicpu(p).oper[1]^.reg,shifterop)
+                  else
+                    hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                         shifterop);
+
+                taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+                asml.insertbefore(hp2, hp1);
+                GetNextInstruction(p, hp2);
+                asml.remove(p);
+                asml.remove(hp1);
+                p.free;
+                hp1.free;
+                p:=hp2;
+                DebugMsg('Peephole FoldShiftProcess done', p);
+                Result:=true;
+                break;
+              end;
+        end
+      else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
+        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
+        Result:=true;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
+    var
+      hp1: tai;
+    begin
+      result:=false;
+      if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
+    var
+      hp1, hp2, hp3, hp4: tai;
+    begin
+      Result:=false;
+      {
+        change
+
+	stp	x29,x30,[sp, #-16]!
+	mov	x29,sp
+	bl	abc
+	ldp	x29,x30,[sp], #16
+	ret
+
+        into
+
+        b         abc
+      }
+      if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
+        MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
+        (taicpu(p).oper[0]^.reg = NR_X29) and
+        (taicpu(p).oper[1]^.reg = NR_X30) and
+        (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
+        (taicpu(p).oper[2]^.ref^.index=NR_NO) and
+        (taicpu(p).oper[2]^.ref^.offset=-16) and
+        (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
+        GetNextInstruction(p, hp1) and
+        GetNextInstruction(hp1, hp2) and
+        SkipEntryExitMarker(hp2, hp2) and
+        GetNextInstruction(hp2, hp3) and
+        SkipEntryExitMarker(hp3, hp3) and
+        GetNextInstruction(hp3, hp4) and
+
+        MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
+        MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
+        (taicpu(hp1).oper[1]^.typ = top_reg) and
+        (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
+
+        MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
+        (taicpu(hp2).oper[0]^.typ = top_ref) and
+
+        MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
+        MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
+        (taicpu(hp3).oper[0]^.reg = NR_X29) and
+        (taicpu(hp3).oper[1]^.reg = NR_X30) and
+        (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
+        (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
+        (taicpu(hp3).oper[2]^.ref^.offset=16) and
+        (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
+
+        MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
+        (taicpu(hp4).ops = 0) then
+        begin
+          asml.Remove(p);
+          asml.Remove(hp1);
+          asml.Remove(hp3);
+          asml.Remove(hp4);
+          taicpu(hp2).opcode:=A_B;
+          p.free;
+          hp1.free;
+          hp3.free;
+          hp4.free;
+          p:=hp2;
+          DebugMsg('Peephole Bl2B done', p);
+          Result:=true;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
+    var
+      hp1: tai;
+    begin
+     Result:=false;
+     if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
+       (taicpu(p).oppostfix=PF_None) then
+       begin
+         RemoveCurrentP(p);
+         DebugMsg('Peephole Mov2None done', p);
+         Result:=true;
+       end
+
+     {
+       optimize
+       mov rX, yyyy
+       ....
+     }
+     else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
+       begin
+         if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
+           Result:=true
+         else if (taicpu(p).ops = 2) and
+           (tai(hp1).typ = ait_instruction) and
+           RedundantMovProcess(p,hp1) then
+           Result:=true;
+       end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean;
+    var
+     hp1,hp2: tai;
+    begin
+      Result:=false;
+      if MatchOpType(taicpu(p),top_reg,top_const) and
+        (taicpu(p).oper[1]^.val=0) and
+        GetNextInstruction(p,hp1) and
+        MatchInstruction(hp1,A_B,[PF_None]) and
+        (taicpu(hp1).condition in [C_EQ,C_NE]) then
+        begin
+          case taicpu(hp1).condition of
+            C_NE:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            C_EQ:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            else
+              Internalerror(2019090801);
+          end;
+          taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+          asml.insertbefore(hp2, hp1);
+
+          asml.remove(p);
+          asml.remove(hp1);
+          p.free;
+          hp1.free;
+          p:=hp2;
+          DebugMsg('Peephole CMPB.E/NE2CBNZ/CBZ done', p);
+          Result:=true;
+        end;
+    end;
+
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
+    var
+      hp1: tai;
     begin
       result := false;
       if p.typ=ait_instruction then
@@ -159,6 +539,48 @@ Implementation
               begin
                 Result:=LookForPostindexedPattern(taicpu(p));
               end;
+            A_MOV:
+              Result:=OptPass1Mov(p);
+            A_STP:
+              Result:=OptPass1STP(p);
+            A_LSR,
+            A_ROR,
+            A_ASR,
+            A_LSL:
+              Result:=OptPass1Shift(p);
+            A_AND:
+              Result:=OptPass1And(p);
+            A_ADD,
+            A_ADC,
+            A_SUB,
+            A_SBC,
+            A_BIC,
+            A_EOR,
+            A_ORR,
+            A_MUL:
+              Result:=OptPass1Data(p);
+            A_UXTB:
+              Result:=OptPass1UXTB(p);
+            A_UXTH:
+              Result:=OptPass1UXTH(p);
+            A_SXTB:
+              Result:=OptPass1SXTB(p);
+            A_SXTH:
+              Result:=OptPass1SXTH(p);
+//            A_VLDR,
+            A_FADD,
+            A_FMUL,
+            A_FDIV,
+            A_FSUB,
+            A_FSQRT,
+            A_FNEG,
+            A_FCVT,
+            A_FABS:
+              begin
+                if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                  RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp') then
+                  Result:=true;
+              end
             else
               ;
           end;
@@ -166,8 +588,18 @@ Implementation
     end;
 
 
-  procedure TCpuAsmOptimizer.PeepHoleOptPass2;
+  function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
     begin
+      result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_CMP:
+              Result:=OptPostCMP(p);
+            else
+              ;
+          end;
+        end;
     end;
 
 begin

+ 344 - 112
compiler/aarch64/cgcpu.pas

@@ -100,6 +100,8 @@ interface
         procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
         procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
         procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
+        procedure g_profilecode(list: TAsmList);override;
        private
         function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
         procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
@@ -122,10 +124,11 @@ interface
 implementation
 
   uses
-    globals,verbose,systems,cutils,
+    globals,verbose,systems,cutils,cclasses,
     paramgr,fmodule,
     symtable,symsym,
     tgobj,
+    ncgutil,
     procinfo,cpupi;
 
 
@@ -179,7 +182,8 @@ implementation
             if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
                 (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
                ((ref.symbol.typ=AT_DATA) and
-                (ref.symbol.bind=AB_LOCAL)) then
+                (ref.symbol.bind=AB_LOCAL)) or
+               (target_info.system=system_aarch64_win64) then
               href.refaddr:=addr_page
             else
               href.refaddr:=addr_gotpage;
@@ -192,7 +196,8 @@ implementation
             if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
                 (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
                ((ref.symbol.typ=AT_DATA) and
-                (ref.symbol.bind=AB_LOCAL)) then
+                (ref.symbol.bind=AB_LOCAL)) or
+               (target_info.system=system_aarch64_win64) then
               begin
                 href.base:=NR_NO;
                 href.refaddr:=addr_pageoffset;
@@ -679,15 +684,32 @@ implementation
     procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
       var
         reg: tregister;
+        href: treference;
+        i: Integer;
       begin
         { use the zero register if possible }
         if a=0 then
           begin
-            if size in [OS_64,OS_S64] then
-              reg:=NR_XZR
+            href:=ref;
+            inc(href.offset,tcgsize2size[size]-1);
+            if (tcgsize2size[size]>1) and (ref.alignment=1) and (simple_ref_type(A_STUR,OS_8,PF_None,ref)=sr_simple) and
+              (simple_ref_type(A_STUR,OS_8,PF_None,href)=sr_simple) then
+              begin
+                href:=ref;
+                for i:=0 to tcgsize2size[size]-1 do
+                  begin
+                    a_load_const_ref(list,OS_8,0,href);
+                    inc(href.offset);
+                  end;
+              end
             else
-              reg:=NR_WZR;
-            a_load_reg_ref(list,size,size,reg,ref);
+              begin
+                if size in [OS_64,OS_S64] then
+                  reg:=NR_XZR
+                else
+                  reg:=NR_WZR;
+                a_load_reg_ref(list,size,size,reg,ref);
+              end;
           end
         else
           inherited;
@@ -905,13 +927,13 @@ implementation
           begin
             case tosize of
               OS_8:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+                list.concat(taicpu.op_reg_reg(A_UXTB,reg2,makeregsize(reg1,OS_32)));
               OS_16:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+                list.concat(taicpu.op_reg_reg(A_UXTH,reg2,makeregsize(reg1,OS_32)));
               OS_S8:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+                list.concat(taicpu.op_reg_reg(A_SXTB,reg2,makeregsize(reg1,OS_32)));
               OS_S16:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+                list.concat(taicpu.op_reg_reg(A_SXTH,reg2,makeregsize(reg1,OS_32)));
               { while "mov wN, wM" automatically inserts a zero-extension and
                 hence we could encode a 64->32 bit move like that, the problem
                 is that we then can't distinguish 64->32 from 32->32 moves, and
@@ -926,7 +948,7 @@ implementation
                 list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
               OS_64,
               OS_S64:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
+                list.concat(taicpu.op_reg_reg(A_SXTW,reg2,makeregsize(reg1,OS_32)));
               else
                 internalerror(2002090901);
             end;
@@ -1023,6 +1045,7 @@ implementation
             { Notify the register allocator that we have written a move
               instruction so it can try to eliminate it. }
             add_move_instruction(instr);
+            { FMOV cannot generate a floating point exception }
           end
         else
           begin
@@ -1030,6 +1053,7 @@ implementation
                (reg_cgsize(reg2)<>tosize) then
               internalerror(2014110913);
             instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+            maybe_check_for_fpu_exception(list);
           end;
         list.Concat(instr);
       end;
@@ -1105,17 +1129,24 @@ implementation
           { "xor Vx,Vx" is used to initialize global regvars to 0 }
           OP_XOR:
             begin
-              if (src<>dst) or
+              if shuffle=nil then
+                begin
+                  dst:=newreg(R_MMREGISTER,getsupreg(dst),R_SUBMM16B);
+                  src:=newreg(R_MMREGISTER,getsupreg(src),R_SUBMM16B);
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,dst,dst,src))
+                end
+              else if (src<>dst) or
                  (reg_cgsize(src)<>size) or
                  assigned(shuffle) then
-                internalerror(2015011401);
-              case size of
-                OS_F32,
-                OS_F64:
-                  list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
-                else
-                  internalerror(2015011402);
-              end;
+                internalerror(2015011401)
+              else
+                case size of
+                  OS_F32,
+                  OS_F64:
+                    list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
+                  else
+                    internalerror(2015011402);
+                end;
             end
           else
             internalerror(2015011403);
@@ -1153,7 +1184,7 @@ implementation
         list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
         { mask the -1 to 255 if src was 0 (anyone find a two-instruction
           branch-free version? All of mine are 3...) }
-        list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,makeregsize(dst,OS_32),makeregsize(dst,OS_32)),PF_B));
+        list.Concat(taicpu.op_reg_reg(A_UXTB,makeregsize(dst,OS_32),makeregsize(dst,OS_32)));
       end;
 
 
@@ -1573,27 +1604,74 @@ implementation
         ref: treference;
         sr: tsuperregister;
         pairreg: tregister;
+        sehreg,sehregp : TAsmSehDirective;
       begin
         result:=0;
         reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_PREINDEXED;
         pairreg:=NR_NO;
-        { store all used registers pairwise }
-        for sr:=lowsr to highsr do
-          if sr in rg[rt].used_in_proc then
-            if pairreg=NR_NO then
-              pairreg:=newreg(rt,sr,sub)
+        { for SEH on Win64 we can only store consecutive register pairs, others
+          need to be stored with STR }
+        if target_info.system=system_aarch64_win64 then
+          begin
+            if rt=R_INTREGISTER then
+              begin
+                sehreg:=ash_savereg_x;
+                sehregp:=ash_saveregp_x;
+              end
+            else if rt=R_MMREGISTER then
+              begin
+                sehreg:=ash_savefreg_x;
+                sehregp:=ash_savefregp_x;
+              end
             else
+              internalerror(2020041304);
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    inc(result,16);
+                    if getsupreg(pairreg)=sr-1 then
+                      begin
+                        list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                        list.concat(cai_seh_directive.create_reg_offset(sehregp,pairreg,16));
+                        pairreg:=NR_NO;
+                      end
+                    else
+                      begin
+                        list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
+                        list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
+                        pairreg:=newreg(rt,sr,sub);
+                      end;
+                  end;
+            if pairreg<>NR_NO then
               begin
                 inc(result,16);
-                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
-                pairreg:=NR_NO
+                list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
+                list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
               end;
-        { one left -> store twice (stack must be 16 bytes aligned) }
-        if pairreg<>NR_NO then
+          end
+        else
           begin
-            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
-            inc(result,16);
+            { store all used registers pairwise }
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    inc(result,16);
+                    list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                    pairreg:=NR_NO
+                  end;
+            { one left -> store twice (stack must be 16 bytes aligned) }
+            if pairreg<>NR_NO then
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+                inc(result,16);
+              end;
           end;
       end;
 
@@ -1614,69 +1692,124 @@ implementation
 
     procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
       var
+        hitem: tlinkedlistitem;
+        seh_proc: tai_seh_directive;
+        templist: TAsmList;
+        suppress_endprologue: boolean;
         ref: treference;
         totalstackframesize: longint;
       begin
-        if nostackframe then
-          exit;
-        { stack pointer has to be aligned to 16 bytes at all times }
-        localsize:=align(localsize,16);
+        hitem:=list.last;
+        { pi_has_unwind_info may already be set at this point if there are
+          SEH directives in assembler body. In this case, .seh_endprologue
+          is expected to be one of those directives, and not generated here. }
+        suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
 
-        { save stack pointer and return address }
-        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
-        ref.addressmode:=AM_PREINDEXED;
-        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
-        { initialise frame pointer }
-        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
-
-        totalstackframesize:=localsize;
-        { save modified integer registers }
-        inc(totalstackframesize,
-          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
-        { only the lower 64 bits of the modified vector registers need to be
-          saved; if the caller needs the upper 64 bits, it has to save them
-          itself }
-        inc(totalstackframesize,
-          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
-
-        { allocate stack space }
-        if localsize<>0 then
+        if not nostackframe then
           begin
+            { stack pointer has to be aligned to 16 bytes at all times }
             localsize:=align(localsize,16);
-            current_procinfo.final_localsize:=localsize;
-            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+
+            if target_info.system=system_aarch64_win64 then
+              include(current_procinfo.flags,pi_has_unwind_info);
+
+            { save stack pointer and return address }
+            reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
+            ref.addressmode:=AM_PREINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+            if target_info.system=system_aarch64_win64 then
+              list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
+            { initialise frame pointer }
+            if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
+              begin
+                a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+                if target_info.system=system_aarch64_win64 then
+                  list.concat(cai_seh_directive.create(ash_setfp));
+              end
+            else
+              begin
+                gen_load_frame_for_exceptfilter(list);
+                localsize:=current_procinfo.maxpushedparasize;
+              end;
+
+            totalstackframesize:=localsize;
+            { save modified integer registers }
+            inc(totalstackframesize,
+              save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+            { only the lower 64 bits of the modified vector registers need to be
+              saved; if the caller needs the upper 64 bits, it has to save them
+              itself }
+            inc(totalstackframesize,
+              save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+            { allocate stack space }
+            if localsize<>0 then
+              begin
+                localsize:=align(localsize,16);
+                current_procinfo.final_localsize:=localsize;
+                handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+                if target_info.system=system_aarch64_win64 then
+                  list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
+              end;
+            { By default, we use the frame pointer to access parameters passed via
+              the stack and the stack pointer to address local variables and temps
+              because
+               a) we can use bigger positive than negative offsets (so accessing
+                  locals via negative offsets from the frame pointer would be less
+                  efficient)
+               b) we don't know the local size while generating the code, so
+                  accessing the parameters via the stack pointer is not possible
+                  without copying them
+              The problem with this is the get_frame() intrinsic:
+               a) it must return the same value as what we pass as parentfp
+                  parameter, since that's how it's used in the TP-style objects unit
+               b) its return value must usable to access all local data from a
+                  routine (locals and parameters), since it's all the nested
+                  routines have access to
+               c) its return value must be usable to construct a backtrace, as it's
+                  also used by the exception handling routines
+
+              The solution we use here, based on something similar that's done in
+              the MIPS port, is to generate all accesses to locals in the routine
+              itself SP-relative, and then after the code is generated and the local
+              size is known (namely, here), we change all SP-relative variables/
+              parameters into FP-relative ones. This means that they'll be accessed
+              less efficiently from nested routines, but those accesses are indirect
+              anyway and at least this way they can be accessed at all
+            }
+            if current_procinfo.has_nestedprocs or
+               (
+                 (target_info.system=system_aarch64_win64) and
+                 (current_procinfo.flags*[pi_has_implicit_finally,pi_needs_implicit_finally,pi_uses_exceptions]<>[])
+               ) then
+              begin
+                current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+                current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+              end;
           end;
-        { By default, we use the frame pointer to access parameters passed via
-          the stack and the stack pointer to address local variables and temps
-          because
-           a) we can use bigger positive than negative offsets (so accessing
-              locals via negative offsets from the frame pointer would be less
-              efficient)
-           b) we don't know the local size while generating the code, so
-              accessing the parameters via the stack pointer is not possible
-              without copying them
-          The problem with this is the get_frame() intrinsic:
-           a) it must return the same value as what we pass as parentfp
-              parameter, since that's how it's used in the TP-style objects unit
-           b) its return value must usable to access all local data from a
-              routine (locals and parameters), since it's all the nested
-              routines have access to
-           c) its return value must be usable to construct a backtrace, as it's
-              also used by the exception handling routines
-
-          The solution we use here, based on something similar that's done in
-          the MIPS port, is to generate all accesses to locals in the routine
-          itself SP-relative, and then after the code is generated and the local
-          size is known (namely, here), we change all SP-relative variables/
-          parameters into FP-relative ones. This means that they'll be accessed
-          less efficiently from nested routines, but those accesses are indirect
-          anyway and at least this way they can be accessed at all
-        }
-        if current_procinfo.has_nestedprocs then
+
+        if not (pi_has_unwind_info in current_procinfo.flags) then
+          exit;
+
+        { Generate unwind data for aarch64-win64 }
+        seh_proc:=cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname);
+        if assigned(hitem) then
+          list.insertafter(seh_proc,hitem)
+        else
+          list.insert(seh_proc);
+        { the directive creates another section }
+        inc(list.section_count);
+        templist:=TAsmList.Create;
+
+        if not suppress_endprologue then
           begin
-            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
-            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            templist.concat(cai_seh_directive.create(ash_endprologue));
           end;
+        if assigned(current_procinfo.endprologue_ai) then
+          current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
+        else
+          list.concatlist(templist);
+        templist.free;
       end;
 
 
@@ -1697,35 +1830,76 @@ implementation
         ref: treference;
         sr, highestsetsr: tsuperregister;
         pairreg: tregister;
+        i,
         regcount: longint;
+        aiarr : array of tai;
       begin
         reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_POSTINDEXED;
-        { highest reg stored twice? }
         regcount:=0;
-        highestsetsr:=RS_NO;
-        for sr:=lowsr to highsr do
-          if sr in rg[rt].used_in_proc then
-            begin
-              inc(regcount);
-              highestsetsr:=sr;
-            end;
-        if odd(regcount) then
+        { due to SEH on Win64 we can only load consecutive registers and single
+          ones are done using LDR, so we need to handle this differently there }
+        if target_info.system=system_aarch64_win64 then
           begin
-            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
-            highestsetsr:=pred(highestsetsr);
-          end;
-        { load all (other) used registers pairwise }
-        pairreg:=NR_NO;
-        for sr:=highestsetsr downto lowsr do
-          if sr in rg[rt].used_in_proc then
-            if pairreg=NR_NO then
-              pairreg:=newreg(rt,sr,sub)
-            else
+            setlength(aiarr,highsr-lowsr+1);
+            pairreg:=NR_NO;
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                begin
+                  if pairreg=NR_NO then
+                    pairreg:=newreg(rt,sr,sub)
+                  else
+                    begin
+                      if getsupreg(pairreg)=sr-1 then
+                        begin
+                          aiarr[regcount]:=taicpu.op_reg_reg_ref(A_LDP,pairreg,newreg(rt,sr,sub),ref);
+                          inc(regcount);
+                          pairreg:=NR_NO;
+                        end
+                      else
+                        begin
+                          aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
+                          inc(regcount);
+                          pairreg:=newreg(rt,sr,sub);
+                        end;
+                    end;
+                end;
+            if pairreg<>NR_NO then
+              begin
+                aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
+                inc(regcount);
+                pairreg:=NR_NO;
+              end;
+            for i:=regcount-1 downto 0 do
+              list.concat(aiarr[i]);
+          end
+        else
+          begin
+            { highest reg stored twice? }
+            highestsetsr:=RS_NO;
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                begin
+                  inc(regcount);
+                  highestsetsr:=sr;
+                end;
+            if odd(regcount) then
               begin
-                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
-                pairreg:=NR_NO
+                list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+                highestsetsr:=pred(highestsetsr);
               end;
+            { load all (other) used registers pairwise }
+            pairreg:=NR_NO;
+            for sr:=highestsetsr downto lowsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                    pairreg:=NR_NO
+                  end;
+          end;
         { There can't be any register left }
         if pairreg<>NR_NO then
           internalerror(2014112602);
@@ -1739,7 +1913,14 @@ implementation
         regsstored: boolean;
         sr: tsuperregister;
       begin
-        if not nostackframe then
+        if not(nostackframe) and
+          { we do not need an exit stack frame when we never return
+
+            * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
+            * the entry stack frame must be normally generated because the subroutine could be still left by
+              an exception and then the unwinding code might need to restore the registers stored by the entry code
+          }
+          not(po_noreturn in current_procinfo.procdef.procoptions) then
           begin
             { if no registers have been stored, we don't have to subtract the
               allocated temp space from the stack pointer }
@@ -1777,6 +1958,11 @@ implementation
 
         { return }
         list.concat(taicpu.op_none(A_RET));
+        if (pi_has_unwind_info in current_procinfo.flags) then
+          begin
+            tcpuprocinfo(current_procinfo).dump_scopes(list);
+            list.concat(cai_seh_directive.create(ash_endproc));
+          end;
       end;
 
 
@@ -1797,9 +1983,9 @@ implementation
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -2256,6 +2442,52 @@ implementation
       end;
 
 
+    procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
+      var
+        r : TRegister;
+        ai: taicpu;
+        l1,l2: TAsmLabel;
+      begin
+        { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
+        if ((cs_check_fpu_exceptions in current_settings.localswitches) and
+            (force or current_procinfo.FPUExceptionCheckNeeded)) then
+          begin
+            r:=getintregister(list,OS_INT);
+            list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
+            list.concat(taicpu.op_reg_const(A_TST,r,$1f));
+            current_asmdata.getjumplabel(l1);
+            current_asmdata.getjumplabel(l2);
+            ai:=taicpu.op_sym(A_B,l1);
+            ai.is_jmp:=true;
+            ai.condition:=C_NE;
+            list.concat(ai);
+            list.concat(taicpu.op_reg_const(A_TST,r,$80));
+            ai:=taicpu.op_sym(A_B,l2);
+            ai.is_jmp:=true;
+            ai.condition:=C_EQ;
+            list.concat(ai);
+            a_label(list,l1);
+            alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
+            dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            a_label(list,l2);
+            if clear then
+              current_procinfo.FPUExceptionCheckNeeded:=false;
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_aarch64_linux then
+          begin
+            list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
+            a_call_name(list,'_mcount',false);
+          end
+        else
+          internalerror(2020021901);
+      end;
+
 
     procedure create_codegen;
       begin

+ 58 - 5
compiler/aarch64/cpubase.pas

@@ -49,6 +49,8 @@ unit cpubase;
       TAsmOp= {$i a64op.inc}
 
       TAsmOps = set of TAsmOp;
+      { AArch64 has less than 256 opcodes so far }
+      TCommonAsmOps = Set of TAsmOp;
 
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
@@ -106,7 +108,7 @@ unit cpubase;
       std_param_align = 8;
 
       { TODO: Calculate bsstart}
-      regnumber_count_bsstart = 128;
+      regnumber_count_bsstart = 256;
 
       regnumber_table : array[tregisterindex] of tregister = (
         {$i ra64num.inc}
@@ -123,9 +125,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                           Instruction post fixes
 *****************************************************************************}
@@ -202,7 +201,7 @@ unit cpubase;
       tshiftmode = (SM_None,
                     { shifted register instructions. LSL can also be used for
                       the index register of certain loads/stores }
-                    SM_LSL,SM_LSR,SM_ASR,
+                    SM_LSL,SM_LSR,SM_ASR,SM_ROR,
                     { extended register instructions: zero/sign extension +
                         optional shift (interpreted as LSL after extension)
                        -- the index register of certain loads/stores can be
@@ -324,6 +323,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
     function dwarf_reg(r:tregister):shortint;
@@ -331,6 +333,7 @@ unit cpubase;
     function eh_return_data_regno(nr: longint): longint;
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
+    function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
 
 
   implementation
@@ -490,6 +493,26 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function dwarf_reg(r:tregister):shortint;
       begin
         result:=regdwarf_table[findreg_by_number(r)];
@@ -619,4 +642,34 @@ unit cpubase;
         result:=-1;
     end;
 
+
+  function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+    var
+      singlerec : tcompsinglerec;
+      doublerec : tcompdoublerec;
+    begin
+      Result:=false;
+      case ft of
+        s32real:
+          begin
+            singlerec.value:=value;
+            singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+            Result:=(singlerec.bytes[0]=0) and (singlerec.bytes[1]=0) and ((singlerec.bytes[2] and 7)=0)  and
+              (((singlerec.bytes[3] and $7e)=$40) or ((singlerec.bytes[3] and $7e)=$3e));
+          end;
+        s64real:
+          begin
+            doublerec.value:=value;
+            doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+            Result:=(doublerec.bytes[0]=0) and (doublerec.bytes[1]=0) and (doublerec.bytes[2]=0) and
+                    (doublerec.bytes[3]=0) and (doublerec.bytes[4]=0) and (doublerec.bytes[5]=0) and
+                    ((((doublerec.bytes[6] and $c0)=$0) and ((doublerec.bytes[7] and $7f)=$40)) or
+                     (((doublerec.bytes[6] and $c0)=$c0) and ((doublerec.bytes[7] and $7f)=$3f)));
+          end;
+        else
+          ;
+      end;
+    end;
+
+
 end.

+ 2 - 2
compiler/aarch64/cpuinfo.pas

@@ -112,12 +112,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
 				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 

+ 1 - 1
compiler/aarch64/cpunode.pas

@@ -35,7 +35,7 @@ implementation
     symcpu,
     aasmdef,
 {$ifndef llvm}
-    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,ncpucon,ncpuflw
 {$else llvm}
     llvmnode
 {$endif llvm}

+ 43 - 34
compiler/aarch64/cpupara.pas

@@ -280,6 +280,7 @@ unit cpupara;
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
         retcgsize: tcgsize;
+        otherside: tcallercallee;
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
@@ -287,11 +288,18 @@ unit cpupara;
          { in this case, it must be returned in registers as if it were passed
            as the first parameter }
          init_para_alloc_values;
-         alloc_para(result,p,vs_value,side,result.def,false,false);
+         { if we're on the callee side, filling the result location is actually the "callerside"
+          as far passing it as a parameter value is concerned }
+         if side=callerside then
+           otherside:=calleeside
+         else
+           otherside:=callerside;
+         alloc_para(result,p,vs_value,otherside,result.def,false,false);
          { sanity check (LOC_VOID for empty records) }
          if not assigned(result.location) or
             not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
            internalerror(2014113001);
+{$ifndef llvm}
          {
            According to ARM64 ABI: "If the size of the argument is less than 8 bytes then
            the size of the argument is set to 8 bytes. The effect is as if the argument
@@ -310,6 +318,7 @@ unit cpupara;
              result.location^.size:=OS_64;
              result.location^.def:=u64inttype;
            end;
+{$endif}
       end;
 
 
@@ -374,7 +383,7 @@ unit cpupara;
             else
               paralen:=tcgsize2size[def_cgsize(paradef)];
             loc:=getparaloc(p.proccalloption,paradef);
-            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+            if (paradef.typ in [objectdef,arraydef,recorddef,setdef]) and
                not is_special_array(paradef) and
                (varspez in [vs_value,vs_const]) then
               paracgsize:=int_cgsize(paralen)
@@ -486,47 +495,45 @@ unit cpupara;
              end
            else
              begin
-{$ifndef llvm}
                paraloc^.size:=locsize;
                paraloc^.def:=locdef;
-{$else llvm}
-               case locsize of
-                 OS_8,OS_16,OS_32:
-                   begin
-                     paraloc^.size:=OS_64;
-                     paraloc^.def:=u64inttype;
-                   end;
-                 OS_S8,OS_S16,OS_S32:
-                   begin
-                     paraloc^.size:=OS_S64;
-                     paraloc^.def:=s64inttype;
-                   end;
-                 OS_F32:
-                   begin
-                     paraloc^.size:=OS_F32;
-                     paraloc^.def:=s32floattype;
-                   end;
-                 OS_F64:
-                   begin
-                     paraloc^.size:=OS_F64;
-                     paraloc^.def:=s64floattype;
-                   end;
-                 else
-                   begin
-                     if is_record(locdef) or
-                        ((locdef.typ=arraydef) and
-                         not is_special_array(locdef)) then
+{$ifdef llvm}
+               if not is_ordinal(paradef) then
+                 begin
+                   case locsize of
+                     OS_8,OS_16,OS_32:
                        begin
                          paraloc^.size:=OS_64;
                          paraloc^.def:=u64inttype;
-                       end
+                       end;
+                     OS_S8,OS_S16,OS_S32:
+                       begin
+                         paraloc^.size:=OS_S64;
+                         paraloc^.def:=s64inttype;
+                       end;
+                     OS_F32:
+                       begin
+                         paraloc^.size:=OS_F32;
+                         paraloc^.def:=s32floattype;
+                       end;
+                     OS_F64:
+                       begin
+                         paraloc^.size:=OS_F64;
+                         paraloc^.def:=s64floattype;
+                       end;
                      else
                        begin
-                         paraloc^.size:=locsize;
-                         paraloc^.def:=locdef;
+                         if is_record(locdef) or
+                            is_set(locdef) or
+                            ((locdef.typ=arraydef) and
+                             not is_special_array(locdef)) then
+                           begin
+                             paraloc^.size:=OS_64;
+                             paraloc^.def:=u64inttype;
+                           end
                        end;
                    end;
-               end;
+                 end;
 {$endif llvm}
              end;
 
@@ -559,6 +566,7 @@ unit cpupara;
                              paraloc^.def:=u32inttype;
                            end;
                        end
+{$ifndef llvm}
                      else
                        begin
                          if side=calleeside then
@@ -567,6 +575,7 @@ unit cpupara;
                              paraloc^.def:=u32inttype;
                            end;
                        end;
+{$endif llvm}
                    end;
 
                  { in case it's a composite, "The argument is passed as though

+ 81 - 2
compiler/aarch64/cpupi.pas

@@ -27,19 +27,38 @@ interface
 
   uses
     procinfo,
-    psub;
+    psub,
+    aasmdata,aasmbase;
 
   type
     tcpuprocinfo=class(tcgprocinfo)
+    private
+      scopes: TAsmList;
+      scopecount: longint;
+      unwindflags: byte;
+    public
       constructor create(aparent: tprocinfo); override;
+      destructor destroy; override;
       procedure set_first_temp_offset; override;
+      procedure add_finally_scope(startlabel,endlabel,handler:TAsmSymbol;implicit:Boolean);
+      procedure add_except_scope(trylabel,exceptlabel,endlabel,filter:TAsmSymbol);
+      procedure dump_scopes(list:tasmlist);
     end;
 
 implementation
 
   uses
+    cutils,
+    fmodule,
+    symtable,
     tgobj,
-    cpubase;
+    cpubase,
+    aasmtai;
+
+  const
+    SCOPE_FINALLY=0;
+    SCOPE_CATCHALL=1;
+    SCOPE_IMPLICIT=2;
 
   constructor tcpuprocinfo.create(aparent: tprocinfo);
     begin
@@ -56,12 +75,72 @@ implementation
       framepointer:=NR_STACK_POINTER_REG;
     end;
 
+  destructor tcpuprocinfo.destroy;
+    begin
+      scopes.free;
+      inherited destroy;
+    end;
+
   procedure tcpuprocinfo.set_first_temp_offset;
     begin
      { leave room for allocated parameters }
      tg.setfirsttemp(align(maxpushedparasize,16));
     end;
 
+  procedure tcpuprocinfo.add_finally_scope(startlabel,endlabel,handler:TAsmSymbol;implicit:Boolean);
+    begin
+      unwindflags:=unwindflags or 2;
+      if implicit then  { also needs catch functionality }
+        unwindflags:=unwindflags or 1;
+      inc(scopecount);
+      if scopes=nil then
+        scopes:=TAsmList.Create;
+
+      if implicit then
+        scopes.concat(tai_const.create_32bit(SCOPE_IMPLICIT))
+      else
+        scopes.concat(tai_const.create_32bit(SCOPE_FINALLY));
+      scopes.concat(tai_const.create_rva_sym(startlabel));
+      scopes.concat(tai_const.create_rva_sym(endlabel));
+      scopes.concat(tai_const.create_rva_sym(handler));
+    end;
+
+  procedure tcpuprocinfo.add_except_scope(trylabel,exceptlabel,endlabel,filter:TAsmSymbol);
+    begin
+      unwindflags:=unwindflags or 3;
+      inc(scopecount);
+      if scopes=nil then
+        scopes:=TAsmList.Create;
+
+      if Assigned(filter) then
+        scopes.concat(tai_const.create_rva_sym(filter))
+      else
+        scopes.concat(tai_const.create_32bit(SCOPE_CATCHALL));
+      scopes.concat(tai_const.create_rva_sym(trylabel));
+      scopes.concat(tai_const.create_rva_sym(exceptlabel));
+      scopes.concat(tai_const.create_rva_sym(endlabel));
+    end;
+
+  procedure tcpuprocinfo.dump_scopes(list: tasmlist);
+    var
+      hdir: tai_seh_directive;
+    begin
+      if (scopecount=0) then
+        exit;
+      hdir:=cai_seh_directive.create_name(ash_handler,'__FPC_specific_handler');
+      if not systemunit.iscurrentunit then
+        current_module.add_extern_asmsym('__FPC_specific_handler',AB_EXTERNAL,AT_FUNCTION);
+      hdir.data.flags:=unwindflags;
+      list.concat(hdir);
+      list.concat(cai_seh_directive.create(ash_handlerdata));
+      inc(list.section_count);
+      list.concat(tai_const.create_32bit(scopecount));
+      list.concatlist(scopes);
+      { return to text, required for GAS compatibility }
+      { This creates a tai_align which is redundant here (although harmless) }
+      new_section(list,sec_code,lower(procdef.mangledname),0);
+    end;
+
 
 begin
   cprocinfo:=tcpuprocinfo;

+ 5 - 2
compiler/aarch64/cputarg.pas

@@ -38,12 +38,15 @@ implementation
     {$ifndef NOTARGETLINUX}
       ,t_linux
     {$endif}
-    {$ifndef NOTARGETBSD}
-      ,t_bsd
+    {$ifndef NOTARGETDARWIN}
+      ,t_darwin
     {$endif}
     {$ifndef NOTARGETANDROID}
       ,t_android
     {$endif}
+    {$ifndef NOTARGETWIN64}
+      ,t_win
+    {$endif}
 
 {**************************************
              Assemblers

+ 9 - 0
compiler/aarch64/ncpuadd.pas

@@ -34,6 +34,7 @@ interface
           function  GetResFlags(unsigned:Boolean):TResFlags;
           function  GetFPUResFlags:TResFlags;
        protected
+          function use_fma : boolean;override;
           procedure second_addfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpboolean;override;
@@ -62,6 +63,12 @@ interface
                                taarch64addnode
 *****************************************************************************}
 
+    function taarch64addnode.use_fma : boolean;
+      begin
+        Result:=true;
+      end;
+
+
     function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
       begin
         case NodeType of
@@ -211,6 +218,7 @@ interface
 
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
            location.register,left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -231,6 +239,7 @@ interface
         { signalling compare so we can get exceptions }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
              left.location.register,right.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 

+ 90 - 0
compiler/aarch64/ncpucon.pas

@@ -0,0 +1,90 @@
+{
+    Copyright (c) 2005 by Florian Klaempfl
+
+    Code generation for const nodes on the AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpucon;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncgcon,cpubase;
+
+    type
+      taarch64realconstnode = class(tcgrealconstnode)
+        function pass_1 : tnode;override;
+        procedure pass_generate_code;override;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      globtype,globals,
+      cpuinfo,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symdef,
+      defutil,
+      cgbase,cgutils,cgobj,
+      procinfo,
+      ncon;
+
+{*****************************************************************************
+                           TARMREALCONSTNODE
+*****************************************************************************}
+
+    function taarch64realconstnode.pass_1 : tnode;
+      begin
+        result:=nil;
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           expectloc:=LOC_MMREGISTER
+         else
+           result:=Inherited pass_1;
+      end;
+
+
+    procedure taarch64realconstnode.pass_generate_code;
+      var
+        hreg : TRegister;
+      begin
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_realconst(A_FMOV,
+              location.register,value_real));
+          end
+        { cast and compare the bit pattern as we cannot handle -0.0 }
+        else if bestrealrec(value_real).Data=0 then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            hreg:=newreg(R_MMREGISTER,getsupreg(location.register),R_SUBMM16B);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EOR,
+              hreg,hreg,hreg));
+          end
+        else
+          Inherited pass_generate_code;
+      end;
+
+begin
+  crealconstnode:=taarch64realconstnode;
+end.

+ 564 - 0
compiler/aarch64/ncpuflw.pas

@@ -0,0 +1,564 @@
+{
+    Copyright (c) 2011-2020 by Free Pascal development team
+
+    Generate Win64-specific exception handling code (based on x86_64 code)
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuflw;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    node,nflw,ncgflw,psub;
+
+  type
+    taarch64raisenode=class(tcgraisenode)
+      function pass_1 : tnode;override;
+    end;
+
+    taarch64onnode=class(tcgonnode)
+      procedure pass_generate_code;override;
+    end;
+
+    taarch64tryexceptnode=class(tcgtryexceptnode)
+      procedure pass_generate_code;override;
+    end;
+
+    taarch64tryfinallynode=class(tcgtryfinallynode)
+      finalizepi: tcgprocinfo;
+      constructor create(l,r:TNode);override;
+      constructor create_implicit(l,r:TNode);override;
+      function simplify(forinline: boolean): tnode;override;
+      procedure pass_generate_code;override;
+      function dogetcopy:tnode;override;
+    end;
+
+implementation
+
+  uses
+    globtype,globals,verbose,systems,fmodule,
+    nbas,ncal,nutils,
+    symconst,symsym,symdef,
+    cgbase,cgobj,cgutils,tgobj,
+    cpubase,htypechk,
+    pass_1,pass_2,
+    aasmbase,aasmtai,aasmdata,aasmcpu,procinfo,cpupi;
+
+  var
+    endexceptlabel: tasmlabel;
+
+
+{ taarch64raisenode }
+
+function taarch64raisenode.pass_1 : tnode;
+  var
+    statements : tstatementnode;
+    raisenode : tcallnode;
+  begin
+    { difference from generic code is that address stack is not popped on reraise }
+    if (target_info.system<>system_aarch64_win64) or assigned(left) then
+      result:=inherited pass_1
+    else
+      begin
+        result:=internalstatements(statements);
+        raisenode:=ccallnode.createintern('fpc_reraise',nil);
+        include(raisenode.callnodeflags,cnf_call_never_returns);
+        addstatement(statements,raisenode);
+      end;
+end;
+
+{ taarch64onnode }
+
+procedure taarch64onnode.pass_generate_code;
+  var
+    exceptvarsym : tlocalvarsym;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+
+    location_reset(location,LOC_VOID,OS_NO);
+
+    { RTL will put exceptobject into X0 when jumping here }
+    cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    { Retrieve exception variable }
+    if assigned(excepTSymtable) then
+      exceptvarsym:=tlocalvarsym(excepTSymtable.SymList[0])
+    else
+      exceptvarsym:=nil;
+
+    if assigned(exceptvarsym) then
+      begin
+        exceptvarsym.localloc.loc:=LOC_REFERENCE;
+        exceptvarsym.localloc.size:=OS_ADDR;
+        tg.GetLocal(current_asmdata.CurrAsmList,sizeof(pint),voidpointertype,exceptvarsym.localloc.reference);
+        cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_FUNCTION_RESULT_REG,exceptvarsym.localloc.reference);
+      end;
+    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    if assigned(right) then
+      secondpass(right);
+
+    { deallocate exception symbol }
+    if assigned(exceptvarsym) then
+      begin
+        tg.UngetLocal(current_asmdata.CurrAsmList,exceptvarsym.localloc.reference);
+        exceptvarsym.localloc.loc:=LOC_INVALID;
+      end;
+    cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+  end;
+
+{ taarch64tryfinallynode }
+
+function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      temprefn:
+        make_not_regable(n,[]);
+      calln:
+        include(tprocinfo(arg).flags,pi_do_call);
+      else
+        ;
+    end;
+    result:=fen_true;
+  end;
+
+function copy_parasize(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      calln:
+        tcgprocinfo(arg).allocate_push_parasize(tcallnode(n).pushed_parasize);
+      else
+        ;
+    end;
+    result:=fen_true;
+  end;
+
+constructor taarch64tryfinallynode.create(l, r: TNode);
+  begin
+    inherited create(l,r);
+    if (target_info.system=system_aarch64_win64) and
+      { Don't create child procedures for generic methods, their nested-like
+        behavior causes compilation errors because real nested procedures
+        aren't allowed for generics. Not creating them doesn't harm because
+        generic node tree is discarded without generating code. }
+       not (df_generic in current_procinfo.procdef.defoptions) then
+      begin
+        finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,r));
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        { Regvar optimization for symbols is suppressed when using exceptions, but
+          temps may be still placed into registers. This must be fixed. }
+        foreachnodestatic(r,@reset_regvars,finalizepi);
+      end;
+  end;
+
+constructor taarch64tryfinallynode.create_implicit(l, r: TNode);
+  begin
+    inherited create_implicit(l, r);
+    if (target_info.system=system_aarch64_win64) then
+      begin
+        if df_generic in current_procinfo.procdef.defoptions then
+          InternalError(2020033101);
+
+        finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,r));
+        include(finalizepi.flags,pi_do_call);
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        finalizepi.allocate_push_parasize(32);
+      end;
+  end;
+
+function taarch64tryfinallynode.simplify(forinline: boolean): tnode;
+  begin
+    result:=inherited simplify(forinline);
+    if (target_info.system<>system_aarch64_win64) then
+      exit;
+    if (result=nil) then
+      begin
+        { generate a copy of the code }
+        finalizepi.code:=right.getcopy;
+        foreachnodestatic(right,@copy_parasize,finalizepi);
+        { For implicit frames, no actual code is available at this time,
+          it is added later in assembler form. So store the nested procinfo
+          for later use. }
+        if implicitframe then
+          begin
+            current_procinfo.finalize_procinfo:=finalizepi;
+          end;
+      end;
+  end;
+
+procedure emit_nop;
+  var
+    dummy: TAsmLabel;
+  begin
+    { To avoid optimizing away the whole thing, prepend a jumplabel with increased refcount }
+    current_asmdata.getjumplabel(dummy);
+    dummy.increfs;
+    cg.a_label(current_asmdata.CurrAsmList,dummy);
+    current_asmdata.CurrAsmList.concat(Taicpu.op_none(A_NOP));
+  end;
+
+procedure taarch64tryfinallynode.pass_generate_code;
+  var
+    trylabel,
+    endtrylabel,
+    finallylabel,
+    endfinallylabel,
+    templabel,
+    oldexitlabel: tasmlabel;
+    oldflowcontrol: tflowcontrol;
+    catch_frame: boolean;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+
+    location_reset(location,LOC_VOID,OS_NO);
+
+    { Do not generate a frame that catches exceptions if the only action
+      would be reraising it. Doing so is extremely inefficient with SEH
+      (in contrast with setjmp/longjmp exception handling) }
+    catch_frame:=implicitframe and
+      (current_procinfo.procdef.proccalloption=pocall_safecall);
+
+    oldflowcontrol:=flowcontrol;
+    flowcontrol:=[fc_inflowcontrol];
+
+    templabel:=nil;
+    current_asmdata.getjumplabel(trylabel);
+    current_asmdata.getjumplabel(endtrylabel);
+    current_asmdata.getjumplabel(finallylabel);
+    current_asmdata.getjumplabel(endfinallylabel);
+    oldexitlabel:=current_procinfo.CurrExitLabel;
+    if implicitframe then
+      current_procinfo.CurrExitLabel:=finallylabel;
+
+    { Start of scope }
+    { Padding with NOP is necessary here because exceptions in called
+      procedures are seen at the next instruction, while CPU/OS exceptions
+      like AV are seen at the current instruction.
+
+      So in the following code
+
+      raise_some_exception;        //(a)
+      try
+        pchar(nil)^:='0';          //(b)
+        ...
+
+      without NOP, exceptions (a) and (b) will be seen at the same address
+      and fall into the same scope. However they should be seen in different scopes.
+    }
+
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,trylabel);
+
+    { try code }
+    if assigned(left) then
+      begin
+        { fc_unwind_xx tells exit/continue/break statements to emit special
+          unwind code instead of just JMP }
+        if not implicitframe then
+          flowcontrol:=flowcontrol+[fc_catching_exceptions,fc_unwind_exit,fc_unwind_loop];
+        secondpass(left);
+        flowcontrol:=flowcontrol-[fc_catching_exceptions,fc_unwind_exit,fc_unwind_loop];
+        if codegenerror then
+          exit;
+      end;
+
+    { finallylabel is only used in implicit frames as an exit point from nested try..finally
+      statements, if any. To prevent finalizer from being executed twice, it must come before
+      endtrylabel (bug #34772) }
+    if catch_frame then
+      begin
+        current_asmdata.getjumplabel(templabel);
+        cg.a_label(current_asmdata.CurrAsmList, finallylabel);
+        { jump over exception handler }
+        cg.a_jmp_always(current_asmdata.CurrAsmList,templabel);
+        { Handle the except block first, so endtrylabel serves both
+          as end of scope and as unwind target. This way it is possible to
+          encode everything into a single scope record. }
+        cg.a_label(current_asmdata.CurrAsmList,endtrylabel);
+        if (current_procinfo.procdef.proccalloption=pocall_safecall) then
+          begin
+            handle_safecall_exception;
+            cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
+          end
+        else
+          InternalError(2014031601);
+        cg.a_label(current_asmdata.CurrAsmList,templabel);
+      end
+    else
+      begin
+        { same as emit_nop but using finallylabel instead of dummy }
+        cg.a_label(current_asmdata.CurrAsmList,finallylabel);
+        finallylabel.increfs;
+        current_asmdata.CurrAsmList.concat(Taicpu.op_none(A_NOP));
+        cg.a_label(current_asmdata.CurrAsmList,endtrylabel);
+      end;
+
+    flowcontrol:=[fc_inflowcontrol];
+    { store the tempflags so that we can generate a copy of the finally handler
+      later on }
+    if not implicitframe then
+      finalizepi.store_tempflags;
+    { generate the inline finalizer code }
+    secondpass(right);
+
+    if codegenerror then
+      exit;
+
+    { normal exit from safecall proc must zero the result register }
+    if implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall) then
+      cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_INT,0,NR_FUNCTION_RESULT_REG);
+
+    cg.a_label(current_asmdata.CurrAsmList,endfinallylabel);
+
+    { generate the scope record in .xdata }
+    tcpuprocinfo(current_procinfo).add_finally_scope(trylabel,endtrylabel,
+      current_asmdata.RefAsmSymbol(finalizepi.procdef.mangledname,AT_FUNCTION),catch_frame);
+
+    if implicitframe then
+      current_procinfo.CurrExitLabel:=oldexitlabel;
+    flowcontrol:=oldflowcontrol;
+  end;
+
+function taarch64tryfinallynode.dogetcopy: tnode;
+  var
+    p : taarch64tryfinallynode absolute result;
+  begin
+    result:=inherited dogetcopy;
+    if (target_info.system=system_aarch64_win64) then
+      begin
+        if df_generic in current_procinfo.procdef.defoptions then
+          InternalError(2020033101);
+
+        p.finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,p.right));
+        if pi_do_call in finalizepi.flags then
+          include(p.finalizepi.flags,pi_do_call);
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(p.finalizepi.flags,pi_has_assembler_block);
+        if implicitframe then
+          p.finalizepi.allocate_push_parasize(32);
+      end;
+  end;
+
+{ taarch64tryexceptnode }
+
+procedure taarch64tryexceptnode.pass_generate_code;
+  var
+    trylabel,
+    exceptlabel,oldendexceptlabel,
+    lastonlabel,
+    exitexceptlabel,
+    continueexceptlabel,
+    breakexceptlabel,
+    oldCurrExitLabel,
+    oldContinueLabel,
+    oldBreakLabel : tasmlabel;
+    onlabel,
+    filterlabel: tasmlabel;
+    oldflowcontrol,tryflowcontrol,
+    exceptflowcontrol : tflowcontrol;
+    hnode : tnode;
+    hlist : tasmlist;
+    onnodecount : tai_const;
+    sym : tasmsymbol;
+  label
+    errorexit;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+    location_reset(location,LOC_VOID,OS_NO);
+
+    oldflowcontrol:=flowcontrol;
+    exceptflowcontrol:=[];
+    continueexceptlabel:=nil;
+    breakexceptlabel:=nil;
+
+    include(flowcontrol,fc_inflowcontrol);
+    { this can be called recursivly }
+    oldBreakLabel:=nil;
+    oldContinueLabel:=nil;
+    oldendexceptlabel:=endexceptlabel;
+
+    { save the old labels for control flow statements }
+    oldCurrExitLabel:=current_procinfo.CurrExitLabel;
+    current_asmdata.getjumplabel(exitexceptlabel);
+    if assigned(current_procinfo.CurrBreakLabel) then
+      begin
+        oldContinueLabel:=current_procinfo.CurrContinueLabel;
+        oldBreakLabel:=current_procinfo.CurrBreakLabel;
+        current_asmdata.getjumplabel(breakexceptlabel);
+        current_asmdata.getjumplabel(continueexceptlabel);
+      end;
+
+    current_asmdata.getjumplabel(exceptlabel);
+    current_asmdata.getjumplabel(endexceptlabel);
+    current_asmdata.getjumplabel(lastonlabel);
+    filterlabel:=nil;
+
+    { start of scope }
+    current_asmdata.getjumplabel(trylabel);
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,trylabel);
+
+    { control flow in try block needs no special handling,
+      just make sure that target labels are outside the scope }
+    secondpass(left);
+    tryflowcontrol:=flowcontrol;
+    if codegenerror then
+      goto errorexit;
+
+    { jump over except handlers }
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+
+    { end of scope }
+    cg.a_label(current_asmdata.CurrAsmList,exceptlabel);
+
+    { set control flow labels for the except block }
+    { and the on statements                        }
+    current_procinfo.CurrExitLabel:=exitexceptlabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=continueexceptlabel;
+        current_procinfo.CurrBreakLabel:=breakexceptlabel;
+      end;
+
+    flowcontrol:=[fc_inflowcontrol];
+    { on statements }
+    if assigned(right) then
+      begin
+        { emit filter table to a temporary asmlist }
+        hlist:=TAsmList.Create;
+        current_asmdata.getaddrlabel(filterlabel);
+        new_section(hlist,sec_rodata_norel,filterlabel.name,4);
+        cg.a_label(hlist,filterlabel);
+        onnodecount:=tai_const.create_32bit(0);
+        hlist.concat(onnodecount);
+
+        hnode:=right;
+        while assigned(hnode) do
+          begin
+            if hnode.nodetype<>onn then
+              InternalError(2011103101);
+            current_asmdata.getjumplabel(onlabel);
+            sym:=current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname,AT_DATA,true);
+            hlist.concat(tai_const.create_rva_sym(sym));
+            hlist.concat(tai_const.create_rva_sym(onlabel));
+            current_module.add_extern_asmsym(sym);
+            cg.a_label(current_asmdata.CurrAsmList,onlabel);
+            secondpass(hnode);
+            inc(onnodecount.value);
+            hnode:=tonnode(hnode).left;
+          end;
+        { add 'else' node to the filter list, too }
+        if assigned(t1) then
+          begin
+            hlist.concat(tai_const.create_32bit(-1));
+            hlist.concat(tai_const.create_rva_sym(lastonlabel));
+            inc(onnodecount.value);
+          end;
+        { now move filter table to permanent list all at once }
+        current_procinfo.aktlocaldata.concatlist(hlist);
+        hlist.free;
+      end;
+
+    cg.a_label(current_asmdata.CurrAsmList,lastonlabel);
+    if assigned(t1) then
+      begin
+        { here we don't have to reset flowcontrol           }
+        { the default and on flowcontrols are handled equal }
+        secondpass(t1);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (flowcontrol*[fc_exit,fc_break,fc_continue]<>[]) then
+          cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+      end;
+    exceptflowcontrol:=flowcontrol;
+
+    if fc_exit in exceptflowcontrol then
+      begin
+        { do some magic for exit in the try block }
+        cg.a_label(current_asmdata.CurrAsmList,exitexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_exit in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldCurrExitLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
+      end;
+
+    if fc_break in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,breakexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_loop in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldBreakLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
+      end;
+
+    if fc_continue in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,continueexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_loop in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldContinueLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
+      end;
+
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,endexceptlabel);
+    tcpuprocinfo(current_procinfo).add_except_scope(trylabel,exceptlabel,endexceptlabel,filterlabel);
+
+errorexit:
+    { restore all saved labels }
+    endexceptlabel:=oldendexceptlabel;
+
+    { restore the control flow labels }
+    current_procinfo.CurrExitLabel:=oldCurrExitLabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=oldContinueLabel;
+        current_procinfo.CurrBreakLabel:=oldBreakLabel;
+      end;
+
+    { return all used control flow statements }
+    flowcontrol:=oldflowcontrol+(exceptflowcontrol +
+      tryflowcontrol - [fc_inflowcontrol]);
+  end;
+
+initialization
+  craisenode:=taarch64raisenode;
+  connode:=taarch64onnode;
+  ctryexceptnode:=taarch64tryexceptnode;
+  ctryfinallynode:=taarch64tryfinallynode;
+end.
+

+ 94 - 0
compiler/aarch64/ncpuinl.pas

@@ -35,6 +35,7 @@ interface
         function first_sqrt_real: tnode; override;
         function first_round_real: tnode; override;
         function first_trunc_real: tnode; override;
+        function first_fma : tnode; override;
         procedure second_abs_real; override;
         procedure second_sqr_real; override;
         procedure second_sqrt_real; override;
@@ -42,6 +43,7 @@ interface
         procedure second_round_real; override;
         procedure second_trunc_real; override;
         procedure second_get_frame; override;
+        procedure second_fma; override;
       private
         procedure load_fpu_location;
       end;
@@ -53,6 +55,7 @@ implementation
       globtype,verbose,globals,
       cpuinfo, defutil,symdef,aasmdata,aasmcpu,
       cgbase,cgutils,pass_1,pass_2,
+      ncal,
       cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
 
 {*****************************************************************************
@@ -104,10 +107,22 @@ implementation
       end;
 
 
+     function taarch64inlinenode.first_fma : tnode;
+       begin
+         if ((is_double(resultdef)) or (is_single(resultdef))) then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
+         else
+           Result:=inherited first_fma;
+       end;
+
     procedure taarch64inlinenode.second_abs_real;
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -115,6 +130,7 @@ implementation
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -122,6 +138,7 @@ implementation
       begin
         load_fpu_location;
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -154,6 +171,7 @@ implementation
         { convert to signed integer rounding towards zero (there's no "round to
           integer using current rounding mode") }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 
@@ -178,6 +196,82 @@ implementation
         location.register:=NR_FRAME_POINTER_REG;
       end;
 
+
+    procedure taarch64inlinenode.second_fma;
+      const
+        op : array[false..true,false..true] of TAsmOp =
+          { positive product }
+          (
+           { positive third operand }
+           (A_FMADD,
+           { negative third operand }
+            A_FNMSUB),
+           { negative product }
+            { positive third operand }
+            (A_FMSUB,
+             A_FNMADD)
+           );
+
+      var
+        paraarray : array[1..3] of tnode;
+        i : integer;
+        negop3,
+        negproduct : boolean;
+      begin
+        negop3:=false;
+        negproduct:=false;
+        paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
+        paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+        paraarray[3]:=tcallparanode(parameters).paravalue;
+
+        { check if a neg. node can be removed
+          this is possible because changing the sign of
+          a floating point number does not affect its absolute
+          value in any way
+        }
+        if paraarray[1].nodetype=unaryminusn then
+          begin
+            paraarray[1]:=tunarynode(paraarray[1]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[2].nodetype=unaryminusn then
+          begin
+            paraarray[2]:=tunarynode(paraarray[2]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negproduct:=not(negproduct);
+          end;
+
+        if paraarray[3].nodetype=unaryminusn then
+          begin
+            paraarray[3]:=tunarynode(paraarray[3]).left;
+            { do not release the unused unary minus node, it is kept and release together with the other nodes,
+              only no code is generated for it }
+            negop3:=true;
+          end;
+
+         for i:=1 to 3 do
+          secondpass(paraarray[i]);
+
+        { no memory operand is allowed }
+        for i:=1 to 3 do
+          begin
+            if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
+          end;
+
+        location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(op[negproduct,negop3],
+          location.register,paraarray[1].location.register,paraarray[2].location.register,paraarray[3].location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
+      end;
+
+
 begin
   cinlinenode:=taarch64inlinenode;
 end.

+ 75 - 9
compiler/aarch64/ncpumat.pas

@@ -76,9 +76,58 @@ implementation
          resultreg  : tregister;
          hl : tasmlabel;
          overflowloc: tlocation;
+         power: longint;
+
+       procedure genOrdConstNodeDiv;
+         var
+           helper1, helper2: TRegister;
+           so: tshifterop;
+         begin
+           if tordconstnode(right).value=0 then
+             internalerror(2020021601)
+           else if tordconstnode(right).value=1 then
+             cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
+           else if (tordconstnode(right).value = int64(-1)) then
+             begin
+               // note: only in the signed case possible..., may overflow
+               if cs_check_overflow in current_settings.localswitches then
+                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
+               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,
+                 resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
+             end
+           else if ispowerof2(tordconstnode(right).value,power) then
+             begin
+               if (is_signed(right.resultdef)) then
+                 begin
+                    helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                    if power = 1 then
+                      helper1:=numerator
+                    else
+                      begin
+                        helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                        cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,63,numerator,helper1);
+                      end;
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_LSR;
+                    so.shiftimm:=64-power;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
+                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,power,helper2,resultreg);
+                  end
+               else
+                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
+             end
+           else
+             { Everything else is handled in the generic code }
+             cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
+               tordconstnode(right).value.svalue,numerator,resultreg);
+         end;
+
       begin
        secondpass(left);
        secondpass(right);
+       { avoid warning }
+       divider:=NR_NO;
 
        { set result location }
        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
@@ -89,16 +138,32 @@ implementation
        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
        numerator:=left.location.register;
 
-       { load divider in a register }
-       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
-       divider:=right.location.register;
-
-       { start division }
-       if is_signed(left.resultdef) then
-         op:=A_SDIV
+       if (right.nodetype=ordconstn) and
+          ((tordconstnode(right).value=1) or
+           (tordconstnode(right).value=int64(-1)) or
+           (tordconstnode(right).value=0) or
+           ispowerof2(tordconstnode(right).value,power)) then
+         begin
+           genOrdConstNodeDiv;
+           if nodetype=modn then
+             begin
+               divider:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+               cg.a_load_const_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),int64(tordconstnode(right).value),divider);
+             end;
+         end
        else
-         op:=A_UDIV;
-       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         begin
+           { load divider in a register }
+           hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+           divider:=right.location.register;
+
+           { start division }
+           if is_signed(left.resultdef) then
+             op:=A_SDIV
+           else
+             op:=A_UDIV;
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         end;
 
        { no divide-by-zero detection available in hardware, emulate (if it's a
          constant, this will have been detected earlier already) }
@@ -187,6 +252,7 @@ implementation
         location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
         location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+        cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
       end;
 
 begin

+ 144 - 16
compiler/aarch64/ncpuset.pas

@@ -34,6 +34,7 @@ interface
            procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
            function  has_jumptable: boolean;override;
            procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
+           procedure genlinearlist(hp: pcaselabel);override;
        end;
 
 
@@ -41,7 +42,7 @@ implementation
 
     uses
       systems,
-      verbose,globals,constexp,
+      verbose,globals,constexp,cutils,
       symconst,symdef,defutil,
       paramgr,
       cpuinfo,
@@ -68,6 +69,120 @@ implementation
       end;
 
 
+    procedure taarch64casenode.genlinearlist(hp : pcaselabel);
+      var
+        first : boolean;
+        lastrange : boolean;
+        last : TConstExprInt;
+        cond_lt,cond_le : tresflags;
+        opcgsize, unsigned_opcgsize: tcgsize;
+
+        procedure genitem(t : pcaselabel);
+          var
+           ovloc: tlocation;
+          begin
+            if assigned(t^.less) then
+              genitem(t^.less);
+            { need we to test the first value }
+            if first and (t^._low>get_min_value(left.resultdef)) then
+              begin
+                cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel);
+              end;
+            if t^._low=t^._high then
+              begin
+                 if t^._low-last=0 then
+                   cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid))
+                 else
+                   begin
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be used }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister,
+                       true,ovloc);
+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid));
+                   end;
+                 last:=t^._low;
+                 lastrange:=false;
+              end
+            else
+              begin
+                 { it begins with the smallest label, if the value }
+                 { is even smaller then jump immediately to the    }
+                 { ELSE-label                                }
+                 if first then
+                   begin
+                      { have we to ajust the first value ? }
+                      if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
+                        begin
+                          { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                            then genlinearlist wouldn't be use }
+                          cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister,
+                            true,ovloc);
+                        end;
+                   end
+                 else
+                   begin
+                     { if there is no unused label between the last and the }
+                     { present label then the lower limit can be checked    }
+                     { immediately. else check the range in between:       }
+
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be use }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister,
+                       true,ovloc);
+                     { no jump necessary here if the new range starts at }
+                     { at the value following the previous one           }
+                     if (aint(t^._low.svalue - last.svalue) <> 1) or
+                        (not lastrange) then
+                       cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel);
+                   end;
+                 { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                   then genlinearlist wouldn't be use }
+                 cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister,
+                   true,ovloc);
+                 cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid));
+
+                 last:=t^._high;
+                 lastrange:=true;
+              end;
+            first:=false;
+            if assigned(t^.greater) then
+              genitem(t^.greater);
+          end;
+
+        begin
+           opcgsize:=def_cgsize(opsize);
+           case opcgsize of
+             OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32:
+               unsigned_opcgsize:=OS_32;
+             OS_64,OS_S64:
+               unsigned_opcgsize:=OS_64;
+             else
+               Internalerror(2019090902);
+           end;
+           if with_sign then
+             begin
+                cond_lt:=F_LT;
+                cond_le:=F_LE;
+             end
+           else
+              begin
+                cond_lt:=F_CC;
+                cond_le:=F_LS;
+             end;
+           { do we need to generate cmps? }
+           if (with_sign and (min_label<0)) then
+             genlinearcmplist(hp)
+           else
+             begin
+                last:=0;
+                lastrange:=false;
+                first:=true;
+                genitem(hp);
+                cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel);
+             end;
+        end;
+
+
     procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
       var
         last: TConstExprInt;
@@ -139,24 +254,37 @@ implementation
         { and finally jump }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
         { generate jump table }
-        if not(target_info.system in systems_darwin) then
-          sectype:=sec_rodata
-        else
+        if target_info.system=system_aarch64_win64 then
           begin
-            { on Mac OS X, dead code stripping ("smart linking") happens based on
-              global symbols: every global/static symbol (symbols that do not
-              start with "L") marks the start of a new "subsection" that is
-              discarded by the linker if there are no references to this symbol.
-              This means that if you put the jump table in the rodata section, it
-              will become part of the block of data associated with the previous
-              non-L-label in the rodata section and stay or be thrown away
-              depending on whether that block of data is referenced. Therefore,
-              jump tables must be added in the code section and since aktlocaldata
-              is inserted right after the routine, it will become part of the
-              same subsection that contains the routine's code }
+            { for Windows we need to make sure that the jump table is located in the
+              same section as the corresponding code as for one clang generates a
+              ABSOLUTE32 relocation that can not be handled correctly and armasm64
+              rejects the difference entries due to the symbols being located in
+              different sections }
             sectype:=sec_code;
+            new_section(current_procinfo.aktlocaldata,sectype,lower(current_procinfo.procdef.mangledname),getprocalign);
+          end
+        else
+          begin
+            if not(target_info.system in systems_darwin) then
+              sectype:=sec_rodata
+            else
+              begin
+                { on Mac OS X, dead code stripping ("smart linking") happens based on
+                  global symbols: every global/static symbol (symbols that do not
+                  start with "L") marks the start of a new "subsection" that is
+                  discarded by the linker if there are no references to this symbol.
+                  This means that if you put the jump table in the rodata section, it
+                  will become part of the block of data associated with the previous
+                  non-L-label in the rodata section and stay or be thrown away
+                  depending on whether that block of data is referenced. Therefore,
+                  jump tables must be added in the code section and since aktlocaldata
+                  is inserted right after the routine, it will become part of the
+                  same subsection that contains the routine's code }
+                sectype:=sec_code;
+              end;
+            new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
           end;
-        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
         if target_info.system in systems_darwin then
           begin
             { additionally, these tables are now marked via ".data_region jt32"

+ 64 - 0
compiler/aarch64/ra64con.inc

@@ -71,161 +71,225 @@ NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
 NR_D0 = tregister($040a0000);
 NR_Q0 = tregister($04050000);
+NR_V08B = tregister($04170000);
+NR_V016B = tregister($04180000);
 NR_B1 = tregister($04010001);
 NR_H1 = tregister($04030001);
 NR_S1 = tregister($04090001);
 NR_D1 = tregister($040a0001);
 NR_Q1 = tregister($04050001);
+NR_V18B = tregister($04170001);
+NR_V116B = tregister($04180001);
 NR_B2 = tregister($04010002);
 NR_H2 = tregister($04030002);
 NR_S2 = tregister($04090002);
 NR_D2 = tregister($040a0002);
 NR_Q2 = tregister($04050002);
+NR_V28B = tregister($04170002);
+NR_V216B = tregister($04180002);
 NR_B3 = tregister($04010003);
 NR_H3 = tregister($04030003);
 NR_S3 = tregister($04090003);
 NR_D3 = tregister($040a0003);
 NR_Q3 = tregister($04050003);
+NR_V38B = tregister($04170003);
+NR_V316B = tregister($04180003);
 NR_B4 = tregister($04010004);
 NR_H4 = tregister($04030004);
 NR_S4 = tregister($04090004);
 NR_D4 = tregister($040a0004);
 NR_Q4 = tregister($04050004);
+NR_V48B = tregister($04170004);
+NR_V416B = tregister($04180004);
 NR_B5 = tregister($04010005);
 NR_H5 = tregister($04030005);
 NR_S5 = tregister($04090005);
 NR_D5 = tregister($040a0005);
 NR_Q5 = tregister($04050005);
+NR_V58B = tregister($04170005);
+NR_V516B = tregister($04180005);
 NR_B6 = tregister($04010006);
 NR_H6 = tregister($04030006);
 NR_S6 = tregister($04090006);
 NR_D6 = tregister($040a0006);
 NR_Q6 = tregister($04050006);
+NR_V68B = tregister($04170006);
+NR_V616B = tregister($04180006);
 NR_B7 = tregister($04010007);
 NR_H7 = tregister($04030007);
 NR_S7 = tregister($04090007);
 NR_D7 = tregister($040a0007);
 NR_Q7 = tregister($04050007);
+NR_V78B = tregister($04170007);
+NR_V716B = tregister($04180007);
 NR_B8 = tregister($04010008);
 NR_H8 = tregister($04030008);
 NR_S8 = tregister($04090008);
 NR_D8 = tregister($040a0008);
 NR_Q8 = tregister($04050008);
+NR_V88B = tregister($04170008);
+NR_V816B = tregister($04180008);
 NR_B9 = tregister($04010009);
 NR_H9 = tregister($04030009);
 NR_S9 = tregister($04090009);
 NR_D9 = tregister($040a0009);
 NR_Q9 = tregister($04050009);
+NR_V98B = tregister($04170009);
+NR_V916B = tregister($04180009);
 NR_B10 = tregister($0401000A);
 NR_H10 = tregister($0403000A);
 NR_S10 = tregister($0409000A);
 NR_D10 = tregister($040a000A);
 NR_Q10 = tregister($0405000A);
+NR_V108B = tregister($0417000A);
+NR_V1016B = tregister($0418000A);
 NR_B11 = tregister($0401000B);
 NR_H11 = tregister($0403000B);
 NR_S11 = tregister($0409000B);
 NR_D11 = tregister($040a000B);
 NR_Q11 = tregister($0405000B);
+NR_V118B = tregister($0417000B);
+NR_V1116B = tregister($0418000B);
 NR_B12 = tregister($0401000C);
 NR_H12 = tregister($0403000C);
 NR_S12 = tregister($0409000C);
 NR_D12 = tregister($040a000C);
 NR_Q12 = tregister($0405000C);
+NR_V128B = tregister($0417000C);
+NR_V1216B = tregister($0418000C);
 NR_B13 = tregister($0401000D);
 NR_H13 = tregister($0403000D);
 NR_S13 = tregister($0409000D);
 NR_D13 = tregister($040a000D);
 NR_Q13 = tregister($0405000D);
+NR_V138B = tregister($0417000D);
+NR_V1316B = tregister($0418000D);
 NR_B14 = tregister($0401000E);
 NR_H14 = tregister($0403000E);
 NR_S14 = tregister($0409000E);
 NR_D14 = tregister($040a000E);
 NR_Q14 = tregister($0405000E);
+NR_V148B = tregister($0417000E);
+NR_V1416B = tregister($0418000E);
 NR_B15 = tregister($0401000F);
 NR_H15 = tregister($0403000F);
 NR_S15 = tregister($0409000F);
 NR_D15 = tregister($040a000F);
 NR_Q15 = tregister($0405000F);
+NR_V158B = tregister($0417000F);
+NR_V1516B = tregister($0418000F);
 NR_B16 = tregister($04010010);
 NR_H16 = tregister($04030010);
 NR_S16 = tregister($04090010);
 NR_D16 = tregister($040a0010);
 NR_Q16 = tregister($04050010);
+NR_V168B = tregister($04170010);
+NR_V1616B = tregister($04180010);
 NR_B17 = tregister($04010011);
 NR_H17 = tregister($04030011);
 NR_S17 = tregister($04090011);
 NR_D17 = tregister($040a0011);
 NR_Q17 = tregister($04050011);
+NR_V178B = tregister($04170011);
+NR_V1716B = tregister($04180011);
 NR_B18 = tregister($04010012);
 NR_H18 = tregister($04030012);
 NR_S18 = tregister($04090012);
 NR_D18 = tregister($040a0012);
 NR_Q18 = tregister($04050012);
+NR_V188B = tregister($04170012);
+NR_V1816B = tregister($04180012);
 NR_B19 = tregister($04010013);
 NR_H19 = tregister($04030013);
 NR_S19 = tregister($04090013);
 NR_D19 = tregister($040a0013);
 NR_Q19 = tregister($04050013);
+NR_V198B = tregister($04170013);
+NR_V1916B = tregister($04180013);
 NR_B20 = tregister($04010014);
 NR_H20 = tregister($04030014);
 NR_S20 = tregister($04090014);
 NR_D20 = tregister($040a0014);
 NR_Q20 = tregister($04050014);
+NR_V208B = tregister($04170014);
+NR_V2016B = tregister($04180014);
 NR_B21 = tregister($04010015);
 NR_H21 = tregister($04030015);
 NR_S21 = tregister($04090015);
 NR_D21 = tregister($040a0015);
 NR_Q21 = tregister($04050015);
+NR_V218B = tregister($04170015);
+NR_V2116B = tregister($04180015);
 NR_B22 = tregister($04010016);
 NR_H22 = tregister($04030016);
 NR_S22 = tregister($04090016);
 NR_D22 = tregister($040a0016);
 NR_Q22 = tregister($04050016);
+NR_V228B = tregister($04170016);
+NR_V2216B = tregister($04180016);
 NR_B23 = tregister($04010017);
 NR_H23 = tregister($04030017);
 NR_S23 = tregister($04090017);
 NR_D23 = tregister($040a0017);
 NR_Q23 = tregister($04050017);
+NR_V238B = tregister($04170017);
+NR_V2316B = tregister($04180017);
 NR_B24 = tregister($04010018);
 NR_H24 = tregister($04030018);
 NR_S24 = tregister($04090018);
 NR_D24 = tregister($040a0018);
 NR_Q24 = tregister($04050018);
+NR_V248B = tregister($04170018);
+NR_V2416B = tregister($04180018);
 NR_B25 = tregister($04010019);
 NR_H25 = tregister($04030019);
 NR_S25 = tregister($04090019);
 NR_D25 = tregister($040a0019);
 NR_Q25 = tregister($04050019);
+NR_V258B = tregister($04170019);
+NR_V2516B = tregister($04180019);
 NR_B26 = tregister($0401001A);
 NR_H26 = tregister($0403001A);
 NR_S26 = tregister($0409001A);
 NR_D26 = tregister($040a001A);
 NR_Q26 = tregister($0405001A);
+NR_V268B = tregister($0417001A);
+NR_V2616B = tregister($0418001A);
 NR_B27 = tregister($0401001B);
 NR_H27 = tregister($0403001B);
 NR_S27 = tregister($0409001B);
 NR_D27 = tregister($040a001B);
 NR_Q27 = tregister($0405001B);
+NR_V278B = tregister($0417001B);
+NR_V2716B = tregister($0418001B);
 NR_B28 = tregister($0401001C);
 NR_H28 = tregister($0403001C);
 NR_S28 = tregister($0409001C);
 NR_D28 = tregister($040a001C);
 NR_Q28 = tregister($0405001C);
+NR_V288B = tregister($0417001C);
+NR_V2816B = tregister($0418001C);
 NR_B29 = tregister($0401001D);
 NR_H29 = tregister($0403001D);
 NR_S29 = tregister($0409001D);
 NR_D29 = tregister($040a001D);
 NR_Q29 = tregister($0405001D);
+NR_V298B = tregister($0417001D);
+NR_V2916B = tregister($0418001D);
 NR_B30 = tregister($0401001E);
 NR_H30 = tregister($0403001E);
 NR_S30 = tregister($0409001E);
 NR_D30 = tregister($040a001E);
 NR_Q30 = tregister($0405001E);
+NR_V308B = tregister($0417001E);
+NR_V3016B = tregister($0418001E);
 NR_B31 = tregister($0401001F);
 NR_H31 = tregister($0403001F);
 NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
 NR_Q31 = tregister($0405001F);
+NR_V318B = tregister($0417001F);
+NR_V3116B = tregister($0418001F);
 NR_NZCV = tregister($05000000);
 NR_FPCR = tregister($05000001);
 NR_FPSR = tregister($05000002);

+ 64 - 0
compiler/aarch64/ra64dwa.inc

@@ -71,6 +71,10 @@
 64,
 64,
 64,
+64,
+64,
+65,
+65,
 65,
 65,
 65,
@@ -81,6 +85,10 @@
 66,
 66,
 66,
+66,
+66,
+67,
+67,
 67,
 67,
 67,
@@ -91,6 +99,10 @@
 68,
 68,
 68,
+68,
+68,
+69,
+69,
 69,
 69,
 69,
@@ -98,129 +110,181 @@
 69,
 70,
 70,
+70                                                             ,
 70,
 70,
 70,
+70,
+71,
 71,
 71,
 71,
 71,
 71,
+71,
+72,
 72,
 72,
 72,
 72,
 72,
+72,
+73,
 73,
 73,
 73,
 73,
 73,
+73,
+74,
 74,
 74,
 74,
 74,
 74,
+74,
+75,
 75,
 75,
 75,
 75,
 75,
+75,
+76,
 76,
 76,
 76,
 76,
 76,
+76,
+77,
 77,
 77,
 77,
 77,
 77,
+77,
+78,
 78,
 78,
 78,
 78,
 78,
+78,
+79,
 79,
 79,
 79,
 79,
 79,
+79,
+80,
 80,
 80,
 80,
 80,
 80,
+80,
+81,
 81,
 81,
 81,
 81,
 81,
+81,
+82,
 82,
 82,
 82,
 82,
 82,
+82,
+83,
 83,
 83,
 83,
 83,
 83,
+83,
+84,
 84,
 84,
 84,
 84,
 84,
+84,
+85,
 85,
 85,
 85,
 85,
 85,
+85,
+86,
 86,
 86,
 86,
 86,
 86,
+86,
+87,
 87,
 87,
 87,
 87,
 87,
+87,
+88,
 88,
 88,
 88,
 88,
 88,
+88,
+89,
 89,
 89,
 89,
 89,
 89,
+89,
+90,
 90,
 90,
 90,
 90,
 90,
+90,
+91,
 91,
 91,
 91,
 91,
 91,
+91,
+92,
 92,
 92,
 92,
 92,
 92,
+92,
+93,
 93,
 93,
 93,
 93,
 93,
+93,
+94,
 94,
 94,
 94,
 94,
 94,
+94,
+95,
+95,
 95,
 95,
 95,

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
-231
+295

+ 64 - 0
compiler/aarch64/ra64num.inc

@@ -71,161 +71,225 @@ tregister($04030000),
 tregister($04090000),
 tregister($040a0000),
 tregister($04050000),
+tregister($04170000),
+tregister($04180000),
 tregister($04010001),
 tregister($04030001),
 tregister($04090001),
 tregister($040a0001),
 tregister($04050001),
+tregister($04170001),
+tregister($04180001),
 tregister($04010002),
 tregister($04030002),
 tregister($04090002),
 tregister($040a0002),
 tregister($04050002),
+tregister($04170002),
+tregister($04180002),
 tregister($04010003),
 tregister($04030003),
 tregister($04090003),
 tregister($040a0003),
 tregister($04050003),
+tregister($04170003),
+tregister($04180003),
 tregister($04010004),
 tregister($04030004),
 tregister($04090004),
 tregister($040a0004),
 tregister($04050004),
+tregister($04170004),
+tregister($04180004),
 tregister($04010005),
 tregister($04030005),
 tregister($04090005),
 tregister($040a0005),
 tregister($04050005),
+tregister($04170005),
+tregister($04180005),
 tregister($04010006),
 tregister($04030006),
 tregister($04090006),
 tregister($040a0006),
 tregister($04050006),
+tregister($04170006),
+tregister($04180006),
 tregister($04010007),
 tregister($04030007),
 tregister($04090007),
 tregister($040a0007),
 tregister($04050007),
+tregister($04170007),
+tregister($04180007),
 tregister($04010008),
 tregister($04030008),
 tregister($04090008),
 tregister($040a0008),
 tregister($04050008),
+tregister($04170008),
+tregister($04180008),
 tregister($04010009),
 tregister($04030009),
 tregister($04090009),
 tregister($040a0009),
 tregister($04050009),
+tregister($04170009),
+tregister($04180009),
 tregister($0401000A),
 tregister($0403000A),
 tregister($0409000A),
 tregister($040a000A),
 tregister($0405000A),
+tregister($0417000A),
+tregister($0418000A),
 tregister($0401000B),
 tregister($0403000B),
 tregister($0409000B),
 tregister($040a000B),
 tregister($0405000B),
+tregister($0417000B),
+tregister($0418000B),
 tregister($0401000C),
 tregister($0403000C),
 tregister($0409000C),
 tregister($040a000C),
 tregister($0405000C),
+tregister($0417000C),
+tregister($0418000C),
 tregister($0401000D),
 tregister($0403000D),
 tregister($0409000D),
 tregister($040a000D),
 tregister($0405000D),
+tregister($0417000D),
+tregister($0418000D),
 tregister($0401000E),
 tregister($0403000E),
 tregister($0409000E),
 tregister($040a000E),
 tregister($0405000E),
+tregister($0417000E),
+tregister($0418000E),
 tregister($0401000F),
 tregister($0403000F),
 tregister($0409000F),
 tregister($040a000F),
 tregister($0405000F),
+tregister($0417000F),
+tregister($0418000F),
 tregister($04010010),
 tregister($04030010),
 tregister($04090010),
 tregister($040a0010),
 tregister($04050010),
+tregister($04170010),
+tregister($04180010),
 tregister($04010011),
 tregister($04030011),
 tregister($04090011),
 tregister($040a0011),
 tregister($04050011),
+tregister($04170011),
+tregister($04180011),
 tregister($04010012),
 tregister($04030012),
 tregister($04090012),
 tregister($040a0012),
 tregister($04050012),
+tregister($04170012),
+tregister($04180012),
 tregister($04010013),
 tregister($04030013),
 tregister($04090013),
 tregister($040a0013),
 tregister($04050013),
+tregister($04170013),
+tregister($04180013),
 tregister($04010014),
 tregister($04030014),
 tregister($04090014),
 tregister($040a0014),
 tregister($04050014),
+tregister($04170014),
+tregister($04180014),
 tregister($04010015),
 tregister($04030015),
 tregister($04090015),
 tregister($040a0015),
 tregister($04050015),
+tregister($04170015),
+tregister($04180015),
 tregister($04010016),
 tregister($04030016),
 tregister($04090016),
 tregister($040a0016),
 tregister($04050016),
+tregister($04170016),
+tregister($04180016),
 tregister($04010017),
 tregister($04030017),
 tregister($04090017),
 tregister($040a0017),
 tregister($04050017),
+tregister($04170017),
+tregister($04180017),
 tregister($04010018),
 tregister($04030018),
 tregister($04090018),
 tregister($040a0018),
 tregister($04050018),
+tregister($04170018),
+tregister($04180018),
 tregister($04010019),
 tregister($04030019),
 tregister($04090019),
 tregister($040a0019),
 tregister($04050019),
+tregister($04170019),
+tregister($04180019),
 tregister($0401001A),
 tregister($0403001A),
 tregister($0409001A),
 tregister($040a001A),
 tregister($0405001A),
+tregister($0417001A),
+tregister($0418001A),
 tregister($0401001B),
 tregister($0403001B),
 tregister($0409001B),
 tregister($040a001B),
 tregister($0405001B),
+tregister($0417001B),
+tregister($0418001B),
 tregister($0401001C),
 tregister($0403001C),
 tregister($0409001C),
 tregister($040a001C),
 tregister($0405001C),
+tregister($0417001C),
+tregister($0418001C),
 tregister($0401001D),
 tregister($0403001D),
 tregister($0409001D),
 tregister($040a001D),
 tregister($0405001D),
+tregister($0417001D),
+tregister($0418001D),
 tregister($0401001E),
 tregister($0403001E),
 tregister($0409001E),
 tregister($040a001E),
 tregister($0405001E),
+tregister($0417001E),
+tregister($0418001E),
 tregister($0401001F),
 tregister($0403001F),
 tregister($0409001F),
 tregister($040a001F),
 tregister($0405001F),
+tregister($0417001F),
+tregister($0418001F),
 tregister($05000000),
 tregister($05000001),
 tregister($05000002),

+ 201 - 137
compiler/aarch64/ra64rni.inc

@@ -67,166 +67,230 @@
 64,
 66,
 67,
-72,
-77,
-82,
-87,
-92,
-97,
+74,
+81,
+88,
+95,
 102,
-107,
-112,
-117,
-122,
-127,
-132,
+109,
+116,
+123,
+130,
 137,
-142,
-147,
-152,
-157,
-162,
-167,
+144,
+151,
+158,
+165,
 172,
-177,
-182,
-187,
-192,
-197,
-202,
+179,
+186,
+193,
+200,
 207,
-212,
-217,
-222,
+214,
+221,
+228,
+235,
+242,
+249,
+256,
+263,
+270,
+277,
+284,
 68,
-73,
-78,
-83,
-88,
-93,
-98,
+75,
+82,
+89,
+96,
 103,
-108,
-113,
-118,
-123,
-128,
-133,
+110,
+117,
+124,
+131,
 138,
-143,
-148,
-153,
-158,
-163,
-168,
+145,
+152,
+159,
+166,
 173,
-178,
-183,
-188,
-193,
-198,
-203,
+180,
+187,
+194,
+201,
 208,
-213,
-218,
-223,
+215,
+222,
+229,
+236,
+243,
+250,
+257,
+264,
+271,
+278,
+285,
 71,
-76,
-81,
-86,
-91,
-96,
-101,
+78,
+85,
+92,
+99,
 106,
-111,
-116,
-121,
-126,
-131,
-136,
+113,
+120,
+127,
+134,
 141,
-146,
-151,
-156,
-161,
-166,
-171,
+148,
+155,
+162,
+169,
 176,
-181,
-186,
-191,
-196,
-201,
-206,
+183,
+190,
+197,
+204,
 211,
-216,
-221,
-226,
+218,
+225,
+232,
+239,
+246,
+253,
+260,
+267,
+274,
+281,
+288,
 69,
-74,
-79,
-84,
-89,
-94,
-99,
+76,
+83,
+90,
+97,
 104,
-109,
-114,
-119,
-124,
-129,
-134,
+111,
+118,
+125,
+132,
 139,
-144,
-149,
-154,
-159,
-164,
-169,
+146,
+153,
+160,
+167,
 174,
-179,
-184,
-189,
-194,
-199,
-204,
+181,
+188,
+195,
+202,
 209,
-214,
-219,
-224,
+216,
+223,
+230,
+237,
+244,
+251,
+258,
+265,
+272,
+279,
+286,
 70,
-75,
-80,
-85,
-90,
-95,
-100,
+77,
+84,
+91,
+98,
 105,
-110,
-115,
-120,
-125,
-130,
-135,
+112,
+119,
+126,
+133,
 140,
-145,
-150,
-155,
-160,
-165,
-170,
+147,
+154,
+161,
+168,
 175,
-180,
-185,
-190,
-195,
-200,
-205,
+182,
+189,
+196,
+203,
 210,
-215,
+217,
+224,
+231,
+238,
+245,
+252,
+259,
+266,
+273,
+280,
+287,
+72,
+79,
+86,
+93,
+100,
+107,
+114,
+121,
+128,
+135,
+142,
+149,
+156,
+163,
+170,
+177,
+184,
+191,
+198,
+205,
+212,
+219,
+226,
+233,
+240,
+247,
+254,
+261,
+268,
+275,
+282,
+289,
+73,
+80,
+87,
+94,
+101,
+108,
+115,
+122,
+129,
+136,
+143,
+150,
+157,
+164,
+171,
+178,
+185,
+192,
+199,
+206,
+213,
 220,
-225,
 227,
-228,
-229,
-230
+234,
+241,
+248,
+255,
+262,
+269,
+276,
+283,
+290,
+291,
+292,
+293,
+294

+ 200 - 136
compiler/aarch64/ra64sri.inc

@@ -1,170 +1,234 @@
 { don't edit, this file is generated from a64reg.dat }
 0,
 67,
-72,
-117,
-122,
-127,
-132,
+74,
 137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
+144,
+151,
+158,
+165,
 172,
-177,
-182,
-187,
-192,
-197,
-202,
+179,
+186,
+193,
+200,
+81,
 207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
+214,
+221,
+228,
+235,
+242,
+249,
+256,
+263,
+270,
+88,
+277,
+284,
+95,
 102,
-107,
-112,
-70,
-75,
-120,
-125,
+109,
+116,
+123,
 130,
-135,
+70,
+77,
 140,
-145,
-150,
-155,
-160,
-165,
-80,
-170,
+147,
+154,
+161,
+168,
 175,
-180,
-185,
-190,
-195,
-200,
-205,
+182,
+189,
+196,
+203,
+84,
 210,
-215,
-85,
-220,
-225,
-90,
-95,
-100,
+217,
+224,
+231,
+238,
+245,
+252,
+259,
+266,
+273,
+91,
+280,
+287,
+98,
 105,
-110,
-115,
-228,
-229,
-68,
-73,
-118,
-123,
-128,
+112,
+119,
+126,
 133,
+292,
+293,
+68,
+75,
 138,
-143,
-148,
-153,
-158,
-163,
-78,
-168,
+145,
+152,
+159,
+166,
 173,
-178,
-183,
-188,
-193,
-198,
-203,
+180,
+187,
+194,
+201,
+82,
 208,
-213,
-83,
-218,
-223,
-88,
-93,
-98,
+215,
+222,
+229,
+236,
+243,
+250,
+257,
+264,
+271,
+89,
+278,
+285,
+96,
 103,
-108,
-113,
-227,
-71,
-76,
-121,
-126,
+110,
+117,
+124,
 131,
-136,
+291,
+71,
+78,
 141,
-146,
-151,
-156,
-161,
-166,
-81,
-171,
+148,
+155,
+162,
+169,
 176,
-181,
-186,
-191,
-196,
-201,
-206,
+183,
+190,
+197,
+204,
+85,
 211,
-216,
-86,
-221,
-226,
-91,
-96,
-101,
+218,
+225,
+232,
+239,
+246,
+253,
+260,
+267,
+274,
+92,
+281,
+288,
+99,
 106,
-111,
-116,
-69,
-74,
-119,
-124,
-129,
+113,
+120,
+127,
 134,
+69,
+76,
 139,
-144,
+146,
+153,
+160,
+167,
+174,
+181,
+188,
+195,
+202,
+83,
+209,
+216,
+223,
+230,
+237,
+244,
+251,
+258,
+265,
+272,
+90,
+279,
+286,
+97,
+104,
+111,
+118,
+125,
+132,
+66,
+294,
+73,
+72,
+80,
+79,
+143,
+142,
+150,
 149,
-154,
-159,
+157,
+156,
 164,
-79,
-169,
-174,
-179,
+163,
+171,
+170,
+178,
+177,
+185,
 184,
-189,
-194,
+192,
+191,
 199,
-204,
-209,
-214,
-84,
+198,
+206,
+205,
+87,
+86,
+213,
+212,
+220,
 219,
-224,
-89,
+227,
+226,
+234,
+233,
+241,
+240,
+248,
+247,
+255,
+254,
+262,
+261,
+269,
+268,
+276,
+275,
 94,
-99,
-104,
-109,
+93,
+283,
+282,
+290,
+289,
+101,
+100,
+108,
+107,
+115,
 114,
-66,
-230,
+122,
+121,
+129,
+128,
+136,
+135,
 1,
 3,
 21,

+ 64 - 0
compiler/aarch64/ra64sta.inc

@@ -71,6 +71,10 @@
 64,
 64,
 64,
+64,
+64,
+65,
+65,
 65,
 65,
 65,
@@ -81,6 +85,10 @@
 66,
 66,
 66,
+66,
+66,
+67,
+67,
 67,
 67,
 67,
@@ -91,6 +99,10 @@
 68,
 68,
 68,
+68,
+68,
+69,
+69,
 69,
 69,
 69,
@@ -101,6 +113,10 @@
 70,
 70,
 70,
+70,
+70,
+71,
+71,
 71,
 71,
 71,
@@ -111,6 +127,10 @@
 72,
 72,
 72,
+72,
+72,
+73,
+73,
 73,
 73,
 73,
@@ -121,6 +141,10 @@
 74,
 74,
 74,
+74,
+74,
+75,
+75,
 75,
 75,
 75,
@@ -131,6 +155,10 @@
 76,
 76,
 76,
+76,
+76,
+77,
+77,
 77,
 77,
 77,
@@ -141,6 +169,10 @@
 78,
 78,
 78,
+78,
+78,
+79,
+79,
 79,
 79,
 79,
@@ -151,6 +183,10 @@
 80,
 80,
 80,
+80,
+80,
+81,
+81,
 81,
 81,
 81,
@@ -161,6 +197,10 @@
 82,
 82,
 82,
+82,
+82,
+83,
+83,
 83,
 83,
 83,
@@ -171,6 +211,10 @@
 84,
 84,
 84,
+84,
+84,
+85,
+85,
 85,
 85,
 85,
@@ -181,6 +225,10 @@
 86,
 86,
 86,
+86,
+86,
+87,
+87,
 87,
 87,
 87,
@@ -191,6 +239,10 @@
 88,
 88,
 88,
+88,
+88,
+89,
+89,
 89,
 89,
 89,
@@ -201,6 +253,10 @@
 90,
 90,
 90,
+90,
+90,
+91,
+91,
 91,
 91,
 91,
@@ -211,6 +267,10 @@
 92,
 92,
 92,
+92,
+92,
+93,
+93,
 93,
 93,
 93,
@@ -221,6 +281,10 @@
 94,
 94,
 94,
+94,
+94,
+95,
+95,
 95,
 95,
 95,

+ 64 - 0
compiler/aarch64/ra64std.inc

@@ -71,161 +71,225 @@
 's0',
 'd0',
 'q0',
+'v0.8b',
+'v0.16b',
 'b1',
 'h1',
 's1',
 'd1',
 'q1',
+'v1.8b',
+'v1.16b',
 'b2',
 'h2',
 's2',
 'd2',
 'q2',
+'v2.8b',
+'v2.16b',
 'b3',
 'h3',
 's3',
 'd3',
 'q3',
+'v3.8b',
+'v3.16b',
 'b4',
 'h4',
 's4',
 'd4',
 'q4',
+'v4.8b',
+'v4.16b',
 'b5',
 'h5',
 's5',
 'd5',
 'q5',
+'v5.8b',
+'v5.16b',
 'b6',
 'h6',
 's6',
 'd6',
 'q6',
+'v6.8b',
+'v6.16b',
 'b7',
 'h7',
 's7',
 'd7',
 'q7',
+'v7.8b',
+'v7.16b',
 'b8',
 'h8',
 's8',
 'd8',
 'q8',
+'v8.8b',
+'v8.16b',
 'b9',
 'h9',
 's9',
 'd9',
 'q9',
+'v9.8b',
+'v9.16b',
 'b10',
 'h10',
 's10',
 'd10',
 'q10',
+'v10.8b',
+'v10.16b',
 'b11',
 'h11',
 's11',
 'd11',
 'q11',
+'v11.8b',
+'v11.16b',
 'b12',
 'h12',
 's12',
 'd12',
 'q12',
+'v12.8b',
+'v12.16b',
 'b13',
 'h13',
 's13',
 'd13',
 'q13',
+'v13.8b',
+'v13.16b',
 'b14',
 'h14',
 's14',
 'd14',
 'q14',
+'v14.8b',
+'v14.16b',
 'b15',
 'h15',
 's15',
 'd15',
 'q15',
+'v15.8b',
+'v15.16b',
 'b16',
 'h16',
 's16',
 'd16',
 'q16',
+'v16.8b',
+'v16.16b',
 'b17',
 'h17',
 's17',
 'd17',
 'q17',
+'v17.8b',
+'v17.16b',
 'b18',
 'h18',
 's18',
 'd18',
 'q18',
+'v18.8b',
+'v18.16b',
 'b19',
 'h19',
 's19',
 'd19',
 'q19',
+'v19.8b',
+'v19.16b',
 'b20',
 'h20',
 's20',
 'd20',
 'q20',
+'v20.8b',
+'v20.16b',
 'b21',
 'h21',
 's21',
 'd21',
 'q21',
+'v21.8b',
+'v21.16b',
 'b22',
 'h22',
 's22',
 'd22',
 'q22',
+'v22.8b',
+'v22.16b',
 'b23',
 'h23',
 's23',
 'd23',
 'q23',
+'v23.8b',
+'v23.16b',
 'b24',
 'h24',
 's24',
 'd24',
 'q24',
+'v24.8b',
+'v24.16b',
 'b25',
 'h25',
 's25',
 'd25',
 'q25',
+'v25.8b',
+'v25.16b',
 'b26',
 'h26',
 's26',
 'd26',
 'q26',
+'v26.8b',
+'v26.16b',
 'b27',
 'h27',
 's27',
 'd27',
 'q27',
+'v27.8b',
+'v27.16b',
 'b28',
 'h28',
 's28',
 'd28',
 'q28',
+'v28.8b',
+'v28.16b',
 'b29',
 'h29',
 's29',
 'd29',
 'q29',
+'v29.8b',
+'v29.16b',
 'b30',
 'h30',
 's30',
 'd30',
 'q30',
+'v30.8b',
+'v30.16b',
 'b31',
 'h31',
 's31',
 'd31',
 'q31',
+'v31.8b',
+'v31.16b',
 'nzcv',
 'fpcr',
 'fpsr',

+ 64 - 0
compiler/aarch64/ra64sup.inc

@@ -71,161 +71,225 @@ RS_H0 = $00;
 RS_S0 = $00;
 RS_D0 = $00;
 RS_Q0 = $00;
+RS_V08B = $00;
+RS_V016B = $00;
 RS_B1 = $01;
 RS_H1 = $01;
 RS_S1 = $01;
 RS_D1 = $01;
 RS_Q1 = $01;
+RS_V18B = $01;
+RS_V116B = $01;
 RS_B2 = $02;
 RS_H2 = $02;
 RS_S2 = $02;
 RS_D2 = $02;
 RS_Q2 = $02;
+RS_V28B = $02;
+RS_V216B = $02;
 RS_B3 = $03;
 RS_H3 = $03;
 RS_S3 = $03;
 RS_D3 = $03;
 RS_Q3 = $03;
+RS_V38B = $03;
+RS_V316B = $03;
 RS_B4 = $04;
 RS_H4 = $04;
 RS_S4 = $04;
 RS_D4 = $04;
 RS_Q4 = $04;
+RS_V48B = $04;
+RS_V416B = $04;
 RS_B5 = $05;
 RS_H5 = $05;
 RS_S5 = $05;
 RS_D5 = $05;
 RS_Q5 = $05;
+RS_V58B = $05;
+RS_V516B = $05;
 RS_B6 = $06;
 RS_H6 = $06;
 RS_S6 = $06;
 RS_D6 = $06;
 RS_Q6 = $06;
+RS_V68B = $06;
+RS_V616B = $06;
 RS_B7 = $07;
 RS_H7 = $07;
 RS_S7 = $07;
 RS_D7 = $07;
 RS_Q7 = $07;
+RS_V78B = $07;
+RS_V716B = $07;
 RS_B8 = $08;
 RS_H8 = $08;
 RS_S8 = $08;
 RS_D8 = $08;
 RS_Q8 = $08;
+RS_V88B = $08;
+RS_V816B = $08;
 RS_B9 = $09;
 RS_H9 = $09;
 RS_S9 = $09;
 RS_D9 = $09;
 RS_Q9 = $09;
+RS_V98B = $09;
+RS_V916B = $09;
 RS_B10 = $0A;
 RS_H10 = $0A;
 RS_S10 = $0A;
 RS_D10 = $0A;
 RS_Q10 = $0A;
+RS_V108B = $0A;
+RS_V1016B = $0A;
 RS_B11 = $0B;
 RS_H11 = $0B;
 RS_S11 = $0B;
 RS_D11 = $0B;
 RS_Q11 = $0B;
+RS_V118B = $0B;
+RS_V1116B = $0B;
 RS_B12 = $0C;
 RS_H12 = $0C;
 RS_S12 = $0C;
 RS_D12 = $0C;
 RS_Q12 = $0C;
+RS_V128B = $0C;
+RS_V1216B = $0C;
 RS_B13 = $0D;
 RS_H13 = $0D;
 RS_S13 = $0D;
 RS_D13 = $0D;
 RS_Q13 = $0D;
+RS_V138B = $0D;
+RS_V1316B = $0D;
 RS_B14 = $0E;
 RS_H14 = $0E;
 RS_S14 = $0E;
 RS_D14 = $0E;
 RS_Q14 = $0E;
+RS_V148B = $0E;
+RS_V1416B = $0E;
 RS_B15 = $0F;
 RS_H15 = $0F;
 RS_S15 = $0F;
 RS_D15 = $0F;
 RS_Q15 = $0F;
+RS_V158B = $0F;
+RS_V1516B = $0F;
 RS_B16 = $10;
 RS_H16 = $10;
 RS_S16 = $10;
 RS_D16 = $10;
 RS_Q16 = $10;
+RS_V168B = $10;
+RS_V1616B = $10;
 RS_B17 = $11;
 RS_H17 = $11;
 RS_S17 = $11;
 RS_D17 = $11;
 RS_Q17 = $11;
+RS_V178B = $11;
+RS_V1716B = $11;
 RS_B18 = $12;
 RS_H18 = $12;
 RS_S18 = $12;
 RS_D18 = $12;
 RS_Q18 = $12;
+RS_V188B = $12;
+RS_V1816B = $12;
 RS_B19 = $13;
 RS_H19 = $13;
 RS_S19 = $13;
 RS_D19 = $13;
 RS_Q19 = $13;
+RS_V198B = $13;
+RS_V1916B = $13;
 RS_B20 = $14;
 RS_H20 = $14;
 RS_S20 = $14;
 RS_D20 = $14;
 RS_Q20 = $14;
+RS_V208B = $14;
+RS_V2016B = $14;
 RS_B21 = $15;
 RS_H21 = $15;
 RS_S21 = $15;
 RS_D21 = $15;
 RS_Q21 = $15;
+RS_V218B = $15;
+RS_V2116B = $15;
 RS_B22 = $16;
 RS_H22 = $16;
 RS_S22 = $16;
 RS_D22 = $16;
 RS_Q22 = $16;
+RS_V228B = $16;
+RS_V2216B = $16;
 RS_B23 = $17;
 RS_H23 = $17;
 RS_S23 = $17;
 RS_D23 = $17;
 RS_Q23 = $17;
+RS_V238B = $17;
+RS_V2316B = $17;
 RS_B24 = $18;
 RS_H24 = $18;
 RS_S24 = $18;
 RS_D24 = $18;
 RS_Q24 = $18;
+RS_V248B = $18;
+RS_V2416B = $18;
 RS_B25 = $19;
 RS_H25 = $19;
 RS_S25 = $19;
 RS_D25 = $19;
 RS_Q25 = $19;
+RS_V258B = $19;
+RS_V2516B = $19;
 RS_B26 = $1A;
 RS_H26 = $1A;
 RS_S26 = $1A;
 RS_D26 = $1A;
 RS_Q26 = $1A;
+RS_V268B = $1A;
+RS_V2616B = $1A;
 RS_B27 = $1B;
 RS_H27 = $1B;
 RS_S27 = $1B;
 RS_D27 = $1B;
 RS_Q27 = $1B;
+RS_V278B = $1B;
+RS_V2716B = $1B;
 RS_B28 = $1C;
 RS_H28 = $1C;
 RS_S28 = $1C;
 RS_D28 = $1C;
 RS_Q28 = $1C;
+RS_V288B = $1C;
+RS_V2816B = $1C;
 RS_B29 = $1D;
 RS_H29 = $1D;
 RS_S29 = $1D;
 RS_D29 = $1D;
 RS_Q29 = $1D;
+RS_V298B = $1D;
+RS_V2916B = $1D;
 RS_B30 = $1E;
 RS_H30 = $1E;
 RS_S30 = $1E;
 RS_D30 = $1E;
 RS_Q30 = $1E;
+RS_V308B = $1E;
+RS_V3016B = $1E;
 RS_B31 = $1F;
 RS_H31 = $1F;
 RS_S31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
+RS_V318B = $1F;
+RS_V3116B = $1F;
 RS_NZCV = $00;
 RS_FPCR = $01;
 RS_FPSR = $02;

+ 2 - 0
compiler/aarch64/racpu.pas

@@ -67,6 +67,8 @@ unit racpu;
       begin
         if ops<1 then
           internalerror(2014122001);
+        if (ops=1) and (operands[1].opr.typ=OPR_REFERENCE) then
+          exit(OS_NO);
         if operands[1].opr.typ<>OPR_REGISTER then
           internalerror(2014122002);
         result:=reg_cgsize(operands[1].opr.reg);

+ 235 - 6
compiler/aarch64/racpugas.pas

@@ -28,14 +28,21 @@ Unit racpugas;
 
     uses
       raatt,racpu,
+      aasmtai,
       cpubase;
 
     type
+
+      { taarch64attreader }
+
       taarch64attreader = class(tattreader)
         actoppostfix : TOpPostfix;
+        actsehdirective : TAsmSehDirective;
         function is_asmopcode(const s: string):boolean;override;
         function is_register(const s:string):boolean;override;
+        function is_targetdirective(const s: string): boolean;override;
         procedure handleopcode;override;
+        procedure handletargetdirective; override;
         procedure BuildReference(oper: taarch64operand; is64bit: boolean);
         procedure BuildOperand(oper: taarch64operand; is64bit: boolean);
         function TryBuildShifterOp(instr: taarch64instruction; opnr: longint) : boolean;
@@ -53,7 +60,7 @@ Unit racpugas;
       cutils,
       { global }
       globtype,verbose,
-      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      systems,aasmbase,aasmdata,aasmcpu,
       { symtable }
       symconst,symsym,symdef,
       procinfo,
@@ -98,6 +105,46 @@ Unit racpugas;
       end;
 
 
+    const
+      { Aarch64 subset of SEH directives. .seh_proc, .seh_endproc and .seh_endepilogue
+        excluded because they are generated automatically when needed. }
+      recognized_directives: set of TAsmSehDirective=[
+        ash_endprologue,ash_handler,ash_handlerdata,
+        ash_stackalloc,ash_nop,ash_savefplr,ash_savefplr_x,
+        ash_savereg,ash_savereg_x,ash_saveregp,ash_saveregp_x,
+        ash_savefreg,ash_savefreg_x,ash_savefregp,ash_savefregp_x,
+        ash_setfp,ash_addfp
+      ];
+
+
+    function taarch64attreader.is_targetdirective(const s: string): boolean;
+      var
+        i: TAsmSehDirective;
+      begin
+        result:=false;
+        if target_info.system<>system_aarch64_win64 then
+          exit;
+
+        for i:=low(TAsmSehDirective) to high(TAsmSehDirective) do
+          begin
+            if not (i in recognized_directives) then
+              continue;
+            if s=sehdirectivestr[i] then
+              begin
+                actsehdirective:=i;
+                result:=true;
+                break;
+              end;
+          end;
+        { allow SEH directives only in pure assember routines }
+        if result and not (po_assembler in current_procinfo.procdef.procoptions) then
+          begin
+            Message(asmr_e_seh_in_pure_asm_only);
+            result:=false;
+          end;
+      end;
+
+
     procedure taarch64attreader.ReadSym(oper: taarch64operand; is64bit: boolean);
       var
          tempstr, mangledname : string;
@@ -461,7 +508,7 @@ Unit racpugas;
 
       const
         shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
-          ('LSL','LSR','ASR',
+          ('LSL','LSR','ASR','ROR',
            'UXTB','UXTH','UXTW','UXTX',
            'SXTB','SXTH','SXTW','SXTX');
       var
@@ -526,7 +573,7 @@ Unit racpugas;
           else
             ;
         end;
-        result:=C_None;;
+        result:=C_None;
       end;
 
 
@@ -565,7 +612,8 @@ Unit racpugas;
                oper.opr.symbol:=hl;
              end
             else if (actopcode=A_ADR) or
-               (actopcode=A_ADRP) then
+               (actopcode=A_ADRP) or
+               (actopcode=A_LDR) then
               begin
                 oper.InitRef;
                 MaybeAddGotAddrMode;
@@ -790,8 +838,11 @@ Unit racpugas;
                           else
                             Message1(sym_e_unknown_id,expr);
                         end
-                       else
-                         MaybeAddGotAddrMode;
+                       else if oper.opr.typ<>OPR_LOCAL then
+                         begin
+                           oper.InitRef;
+                           MaybeAddGotAddrMode;
+                         end;
                      end;
                   end;
                   if actasmtoken=AS_DOT then
@@ -1031,6 +1082,184 @@ Unit racpugas;
       end;
 
 
+    procedure taarch64attreader.handletargetdirective;
+
+      function maxoffset(ash:TAsmSehDirective):aint;
+        begin
+          case ash of
+            ash_savefplr,
+            ash_saveregp,
+            ash_savereg,
+            ash_savefregp,
+            ash_savefreg:
+              result:=504;
+            ash_savefplr_x,
+            ash_saveregp_x,
+            ash_savefregp_x:
+              result:=-512;
+            ash_savereg_x,
+            ash_savefreg_x:
+              result:=-256;
+            ash_addfp:
+              result:=2040;
+            else
+              internalerror(2020041204);
+          end;
+        end;
+
+      procedure add_reg_with_offset(ash:TAsmSehDirective;hreg:tregister;hnum:aint;neg:boolean);
+        begin
+          if (neg and ((hnum>0) or (hnum<maxoffset(ash)) or (((-hnum) and $7)<>0))) or
+              (not neg and ((hnum<0) or (hnum>maxoffset(ash)) or ((hnum and $7)<>0))) then
+            Message1(asmr_e_bad_seh_directive_offset,sehdirectivestr[actsehdirective])
+          else
+            begin
+              if neg then
+                hnum:=-hnum;
+              if hreg=NR_NO then
+                curlist.concat(cai_seh_directive.create_offset(actsehdirective,hnum))
+              else
+                curlist.concat(cai_seh_directive.create_reg_offset(actsehdirective,hreg,hnum));
+            end;
+        end;
+
+      var
+        hreg,
+        hreg2 : TRegister;
+        hnum : aint;
+        flags : integer;
+        ai : tai_seh_directive;
+        hs : string;
+        err : boolean;
+      begin
+        if actasmtoken<>AS_TARGET_DIRECTIVE then
+          InternalError(2020033102);
+        Consume(AS_TARGET_DIRECTIVE);
+        Include(current_procinfo.flags,pi_has_unwind_info);
+
+        case actsehdirective of
+          ash_nop,
+          ash_setfp,
+          ash_endprologue,
+          ash_handlerdata:
+            curlist.concat(cai_seh_directive.create(actsehdirective));
+
+          ash_handler:
+            begin
+              hs:=actasmpattern;
+              Consume(AS_ID);
+              flags:=0;
+              err:=false;
+              while actasmtoken=AS_COMMA do
+                begin
+                  Consume(AS_COMMA);
+                  if actasmtoken=AS_AT then
+                    begin
+                      Consume(AS_AT);
+                      if actasmtoken=AS_ID then
+                        begin
+                          uppervar(actasmpattern);
+                          if actasmpattern='EXCEPT' then
+                            flags:=flags or 1
+                          else if actasmpattern='UNWIND' then
+                            flags:=flags or 2
+                          else
+                            err:=true;
+                          Consume(AS_ID);
+                        end
+                      else
+                        err:=true;
+                    end
+                  else
+                    err:=true;
+                  if err then
+                    begin
+                      Message(asmr_e_syntax_error);
+                      RecoverConsume(false);
+                      exit;
+                    end;
+                end;
+
+              ai:=cai_seh_directive.create_name(ash_handler,hs);
+              ai.data.flags:=flags;
+              curlist.concat(ai);
+            end;
+          ash_savefplr,
+          ash_savefplr_x:
+            begin
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,NR_NO,hnum,actsehdirective=ash_savefplr_x);
+            end;
+          ash_savereg,
+          ash_savereg_x:
+            begin
+              hreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_INTREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<19) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              Consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savereg_x);
+            end;
+          ash_saveregp,
+          ash_saveregp_x:
+            begin
+              hreg:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_INTREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<19) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hreg2:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg2)<>R_INTREGISTER) or (getsubreg(hreg2)<>R_SUBWHOLE) or (getsupreg(hreg2)<>getsupreg(hreg)+1) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_saveregp_x);
+            end;
+          ash_savefreg,
+          ash_savefreg_x:
+            begin
+              hreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_MMREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<8) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              Consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savefreg_x);
+            end;
+          ash_savefregp,
+          ash_savefregp_x:
+            begin
+              hreg:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_MMREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<8) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hreg2:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg2)<>R_MMREGISTER) or (getsubreg(hreg2)<>R_SUBWHOLE) or (getsupreg(hreg2)<>getsupreg(hreg)+1) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savefregp_x);
+            end;
+          ash_stackalloc:
+            begin
+              hnum:=BuildConstExpression(false,false);
+              if (hnum<0) or (hnum>$FFFFFF) or ((hnum and 7)<>0) then
+                Message1(asmr_e_bad_seh_directive_offset,sehdirectivestr[ash_stackalloc])
+              else
+                curlist.concat(cai_seh_directive.create_offset(ash_stackalloc,hnum));
+            end;
+          else
+            InternalError(2020033103);
+        end;
+        if actasmtoken<>AS_SEPARATOR then
+          Consume(AS_SEPARATOR);
+      end;
+
+
 {*****************************************************************************
                                      Initialize
 *****************************************************************************}

+ 10 - 0
compiler/aarch64/rgcpu.pas

@@ -36,6 +36,7 @@ unit rgcpu;
       trgcpu=class(trgobj)
         procedure do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
         procedure do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
+        function get_spill_subreg(r: tregister): tsubregister; override;
        protected
         procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
       end;
@@ -51,6 +52,15 @@ implementation
       verbose,cutils,
       cgobj;
 
+    function  trgcpu.get_spill_subreg(r:tregister) : tsubregister;
+      begin
+        if (getregtype(r)<>R_MMREGISTER) then
+          result:=defaultsub
+        else
+          result:=getsubreg(r);
+      end;
+
+
     procedure trgcpu.do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
       begin
         do_spill_op(list,A_LDR,pos,spilltemp,tempreg,orgsupreg);

+ 48 - 0
compiler/aarch64/tripletcpu.pas

@@ -0,0 +1,48 @@
+{
+    Copyright (c) 2020 by Jonas Maebe
+
+    Construct the cpu part of the triplet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit tripletcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+
+implementation
+
+uses
+  globals, systems, cpuinfo;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+  begin
+    if target_info.system in systems_darwin then
+      result:='arm64'
+    else
+      result:='aarch64'
+  end;
+
+
+end.
+

+ 22 - 4
compiler/aasmbase.pas

@@ -170,13 +170,18 @@ interface
          { initial heap segment for 16-bit DOS }
          sec_heap,
          { dwarf based/gcc style exception handling }
-         sec_gcc_except_table
+         sec_gcc_except_table,
+         sec_arm_attribute
        );
 
        TObjCAsmSectionType = sec_objc_class..sec_objc_protolist;
 
        TAsmSectionOrder = (secorder_begin,secorder_default,secorder_end);
 
+       TSectionFlag = (SF_A,SF_W,SF_X);
+       TSectionFlags = set of TSectionFlag;
+       TSectionProgbits = (SPB_None,SPB_PROGBITS,SPB_NOBITS,SPB_NOTE,SPB_ARM_ATTRIBUTES);
+
        TAsmSymbol = class(TFPHashObject)
        private
          { this need to be incremented with every symbol loading into the
@@ -224,6 +229,7 @@ interface
          labeltype : TAsmLabelType;
          is_set    : boolean;
          is_public : boolean;
+         defined_in_asmstatement : boolean;
          constructor Createlocal(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createstatic(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createglobal(AList: TFPHashObjectList; const modulename: TSymStr; nr: longint; ltyp: TAsmLabelType);
@@ -234,7 +240,7 @@ interface
     function create_smartlink_library:boolean;inline;
     function create_smartlink:boolean;inline;
 
-    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
+    function ApplyAsmSymbolRestrictions(const s: ansistring): ansistring;
 
     { dummy default noop callback }
     procedure default_global_used;
@@ -251,7 +257,7 @@ interface
 implementation
 
     uses
-      verbose;
+      verbose,fpccrc;
 
 
     function create_smartlink_sections:boolean;inline;
@@ -282,16 +288,28 @@ implementation
       end;
 
 
-    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
+    function ApplyAsmSymbolRestrictions(const s: ansistring): ansistring;
       var
         i : longint;
         rchar: char;
+        crc: Cardinal;
+        charstoremove: integer;
       begin
         Result:=s;
         rchar:=target_asm.dollarsign;
         for i:=1 to Length(Result) do
           if Result[i]='$' then
             Result[i]:=rchar;
+        if (target_asm.labelmaxlen<>-1) and (Length(Result)>target_asm.labelmaxlen) then
+          begin
+            crc:=0;
+            crc:=UpdateCrc32(crc,Result[1],Length(Result));
+            charstoremove:=Length(Result)-target_asm.labelmaxlen+13;
+            Delete(Result,(Length(Result)-charstoremove) div 2,charstoremove);
+            Result:='_'+target_asm.dollarsign+'CRC'+hexstr(crc,8)+Result;
+            if Length(Result)>target_asm.labelmaxlen then
+              Internalerror(2020042501);
+          end;
       end;
 
 

+ 11 - 10
compiler/aasmcnst.pas

@@ -384,7 +384,7 @@ type
         b) the def of the record should be automatically constructed based on
            the types of the emitted fields
 
-        packrecords: same as "pacrecords x"
+        packrecords: same as "packrecords x"
         recordalign: specify the (minimum) alignment of the start of the record
           (no equivalent in source code), used as an alternative for explicit
           align statements. Use "1" if it should be calculated based on the
@@ -512,7 +512,7 @@ implementation
      cutils,
      verbose,globals,systems,widestr,
      fmodule,
-     symtable,defutil;
+     symtable,symutil,defutil;
 
 {****************************************************************************
                        taggregateinformation
@@ -589,8 +589,7 @@ implementation
             repeat
               inc(i);
               sym:=tsym(tabstractrecorddef(def).symtable.symlist[i]);
-            until (sym.typ=fieldvarsym) and
-              not(sp_static in sym.symoptions);
+            until is_normal_fieldvarsym(sym);
             curfield:=tfieldvarsym(sym);
             nextoffset:=curfield.fieldoffset;
             curindex:=i;
@@ -828,8 +827,6 @@ implementation
 
 
    destructor tai_aggregatetypedconst.destroy;
-     var
-       ai: tai_abstracttypedconst;
      begin
        fvalues.free;
        inherited destroy;
@@ -1151,7 +1148,9 @@ implementation
    class function ttai_typedconstbuilder.get_string_symofs(typ: tstringtype; winlikewidestring: boolean): pint;
      begin
        { darwin's linker does not support negative offsets }
-       if not(target_info.system in systems_darwin) then
+       if not(target_info.system in systems_darwin) and
+          { it seems that clang's assembler has a bug with the ADRP instruction... }
+          (target_info.system<>system_aarch64_win64) then
          result:=0
        else
          result:=get_string_header_size(typ,winlikewidestring);
@@ -1161,7 +1160,9 @@ implementation
    class function ttai_typedconstbuilder.get_dynarray_symofs:pint;
      begin
        { darwin's linker does not support negative offsets }
-       if not (target_info.system in systems_darwin) then
+       if not (target_info.system in systems_darwin) and
+          { it seems that clang's assembler has a bug with the ADRP instruction... }
+          (target_info.system<>system_aarch64_win64) then
          result:=0
        else
          result:=get_dynarray_header_size;
@@ -1843,7 +1844,7 @@ implementation
 
    procedure ttai_typedconstbuilder.emit_procdef_const(pd: tprocdef);
      begin
-       emit_tai(Tai_const.Createname(pd.mangledname,AT_FUNCTION,0),cprocvardef.getreusableprocaddr(pd));
+       emit_tai(Tai_const.Createname(pd.mangledname,AT_FUNCTION,0),cprocvardef.getreusableprocaddr(pd,pc_address_only));
      end;
 
 
@@ -2086,7 +2087,7 @@ implementation
          begin
            sym:=search_struct_member_no_helper(tabstractrecorddef(curdef),fields[i]);
            if not assigned(sym) or
-              (sym.typ<>fieldvarsym) or
+              not is_normal_fieldvarsym(sym) or
               ((i<>high(fields)) and
                not(tfieldvarsym(sym).vardef.typ in [objectdef,recorddef])) then
              internalerror(2015071505);

+ 1 - 1
compiler/aasmdef.pas

@@ -56,7 +56,7 @@ function TAsmDataDef.DefineAsmSymbolByClass(symclass: TAsmSymbolClass; const s:
     result:=DefineAsmSymbolByClassBase(symclass,s,_bind,_typ,def,wasdefined);
     { define the indirect asmsymbol if necessary }
     if not wasdefined and
-       (_bind in [AB_GLOBAL,AB_COMMON]) and
+       (_bind in [AB_GLOBAL,AB_COMMON,AB_PRIVATE_EXTERN]) and
        (_typ<>AT_DATA_NOINDIRECT) and
        (((_typ=AT_DATA) and
          (tf_supports_packages in target_info.flags) and

+ 114 - 26
compiler/aasmtai.pas

@@ -98,7 +98,8 @@ interface
           { SEH directives used in ARM,MIPS and x86_64 COFF targets }
           ait_seh_directive,
           { Dwarf CFI directive }
-          ait_cfi
+          ait_cfi,
+          ait_eabi_attribute
           );
 
         taiconst_type = (
@@ -155,10 +156,13 @@ interface
           aitconst_got,
           { offset of symbol itself from GOT }
           aitconst_gotoff_symbol,
+          { offset in TLS block }
+          aitconst_dtpoff,
           { ARM TLS code }
           aitconst_gottpoff,
-          aitconst_tpoff
-
+          aitconst_tpoff,
+          aitconst_tlsgd,
+          aitconst_tlsdesc
         );
 
         tairealconsttype = (
@@ -238,7 +242,8 @@ interface
           'local',
 {$endif}
           'cfi',
-          'seh_directive'
+          'seh_directive',
+          'eabi_attribute'
           );
 
     type
@@ -349,7 +354,8 @@ interface
                      ait_importexport, ait_local,
 {$endif wasm}
                      ait_seh_directive,
-                     ait_cfi
+                     ait_cfi,
+                     ait_eabi_attribute
                     ];
 
 
@@ -407,7 +413,10 @@ interface
           ash_endprologue,ash_handler,ash_handlerdata,
           ash_eh,ash_32,ash_no32,
           ash_setframe,ash_stackalloc,ash_pushreg,
-          ash_savereg,ash_savexmm,ash_pushframe,
+          ash_savereg,ash_savereg_x,ash_saveregp,ash_saveregp_x,
+          ash_savexmm,ash_savefreg,ash_savefreg_x,ash_savefregp,ash_savefregp_x,ash_pushframe,
+          ash_setfp,ash_addfp,ash_savefplr,ash_savefplr_x,
+          ash_nop,
           ash_pushnv,ash_savenv
         );
 
@@ -448,7 +457,10 @@ interface
         '.seh_endprologue','.seh_handler','.seh_handlerdata',
         '.seh_eh','.seh_32','seh_no32',
         '.seh_setframe','.seh_stackalloc','.seh_pushreg',
-        '.seh_savereg','.seh_savexmm','.seh_pushframe',
+        '.seh_savereg','.seh_savereg_x','.seh_saveregp','.seh_saveregp_x',
+        '.seh_savexmm','.seh_savefreg','.seh_savefreg_x','.seh_savefregp','.seh_savefregp_x','.seh_pushframe',
+        '.seh_setfp','.seh_addfp','.seh_savefplr','.seh_savefplr_x',
+        '.seh_nop',
         '.pushnv','.savenv'
       );
       symbolpairkindstr: array[TSymbolPairKind] of string[11]=(
@@ -624,10 +636,6 @@ interface
           function getcopy:tlinkedlistitem;override;
        end;
 
-       type
-         TSectionFlags = (SF_None,SF_A,SF_W,SF_X);
-         TSectionProgbits = (SPB_None,SPB_PROGBITS,SPB_NOBITS);
-
        { Generates a section / segment directive }
        tai_section = class(tai)
           sectype  : TAsmSectiontype;
@@ -656,9 +664,9 @@ interface
           is_global : boolean;
           sym       : tasmsymbol;
           size      : asizeint;
-          constructor Create(const _name : string;_size : asizeint; def: tdef);
-          constructor Create_hidden(const _name : string;_size : asizeint; def: tdef);
-          constructor Create_global(const _name : string;_size : asizeint; def: tdef);
+          constructor Create(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_global(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure derefimpl;override;
@@ -999,6 +1007,18 @@ interface
           procedure ppuwrite(ppufile:tcompilerppufile);override;
         end;
 
+        teattrtyp = (eattrtype_none,eattrtype_dword,eattrtype_ntbs);
+        tai_eabi_attribute = class(tai)
+          eattr_typ : teattrtyp;
+          tag,value : dword;
+          valuestr : pstring;
+          constructor create(atag,avalue : dword);
+          constructor create(atag : dword;const avalue : string);
+          destructor destroy;override;
+          constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
+          procedure ppuwrite(ppufile:tcompilerppufile);override;
+        end;
+
     var
       { array with all class types for tais }
       aiclass : taiclassarray;
@@ -1026,7 +1046,8 @@ implementation
 {$endif x86}
       SysUtils,
       verbose,
-      globals;
+      globals,
+      ppu;
 
     const
       pputaimarker = 254;
@@ -1112,7 +1133,7 @@ implementation
     constructor tai_symbolpair.ppuload(t: taitype; ppufile: tcompilerppufile);
       begin
         inherited ppuload(t,ppufile);
-        kind:=TSymbolPairKind(ppufile.getbyte);;
+        kind:=TSymbolPairKind(ppufile.getbyte);
         sym:=ppufile.getpshortstring;
         value:=ppufile.getpshortstring;
       end;
@@ -1265,6 +1286,7 @@ implementation
         sectype:=asectype;
         secalign:=Aalign;
         secorder:=Asecorder;
+        TObjData.sectiontype2progbitsandflags(sectype,secprogbits,secflags);
         name:=stringdup(Aname);
         sec:=nil;
       end;
@@ -1276,7 +1298,7 @@ implementation
         sectype:=TAsmSectiontype(ppufile.getbyte);
         secalign:=ppufile.getlongint;
         name:=ppufile.getpshortstring;
-        secflags:=TSectionFlags(ppufile.getbyte);
+        ppufile.getset(tppuset1(secflags));
         secprogbits:=TSectionProgbits(ppufile.getbyte);
         sec:=nil;
       end;
@@ -1294,7 +1316,7 @@ implementation
         ppufile.putbyte(byte(sectype));
         ppufile.putlongint(secalign);
         ppufile.putstring(name^);
-        ppufile.putbyte(byte(secflags));
+        ppufile.putset(tppuset1(secflags));
         ppufile.putbyte(byte(secprogbits));
       end;
 
@@ -1303,12 +1325,12 @@ implementation
                              TAI_DATABLOCK
  ****************************************************************************}
 
-    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef);
+    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
 
       begin
          inherited Create;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,_typ,def);
          { keep things aligned }
          if _size<=0 then
            _size:=sizeof(aint);
@@ -1316,13 +1338,13 @@ implementation
          is_global:=false;
       end;
 
-    constructor tai_datablock.Create_hidden(const _name: string; _size: asizeint; def: tdef);
+    constructor tai_datablock.Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ:Tasmsymtype);
       begin
         if tf_supports_hidden_symbols in target_info.flags then
           begin
             inherited Create;
             typ:=ait_datablock;
-            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,AT_DATA,def);
+            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,_typ,def);
             { keep things aligned }
             if _size<=0 then
               _size:=sizeof(aint);
@@ -1330,15 +1352,15 @@ implementation
             is_global:=true;
           end
         else
-          Create(_name,_size,def);
+          Create(_name,_size,def,_typ);
       end;
 
 
-    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef);
+    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
       begin
          inherited Create;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,_typ,def);
          { keep things aligned }
          if _size<=0 then
            _size:=sizeof(aint);
@@ -2075,7 +2097,7 @@ implementation
             result:=8;
           aitconst_secrel32_symbol,
           aitconst_rva_symbol :
-            if target_info.system=system_x86_64_win64 then
+            if target_info.system in systems_peoptplus then
               result:=sizeof(longint)
             else
               result:=sizeof(pint);
@@ -2118,6 +2140,16 @@ implementation
             result:=sizeof(pint);
           aitconst_gotoff_symbol:
             result:=4;
+          aitconst_gottpoff:
+            result:=4;
+          aitconst_tlsgd:
+            result:=4;
+          aitconst_tpoff:
+            result:=4;
+          aitconst_tlsdesc:
+            result:=4;
+          aitconst_dtpoff:
+            result:=4;
           else
             internalerror(200603253);
         end;
@@ -3327,8 +3359,20 @@ implementation
         sd_offset,     { stackalloc }
         sd_reg,        { pushreg }
         sd_regoffset,  { savereg }
+        sd_regoffset,  { savereg_x }
+        sd_regoffset,  { saveregp }
+        sd_regoffset,  { saveregp_x }
         sd_regoffset,  { savexmm }
+        sd_regoffset,  { savefreg }
+        sd_regoffset,  { savefreg_x }
+        sd_regoffset,  { savefregp }
+        sd_regoffset,  { savefregp_x }
         sd_none,       { pushframe }
+        sd_none,       { setfp }
+        sd_none,       { addfp }
+        sd_offset,     { savefplr }
+        sd_offset,     { savefplr_x }
+        sd_none,       { nop }
         sd_reg,        { pushnv }
         sd_none        { savenv }
       );
@@ -3419,6 +3463,50 @@ implementation
       begin
       end;
 
+
+{****************************************************************************
+                              tai_eabi_attribute
+ ****************************************************************************}
+
+    constructor tai_eabi_attribute.create(atag,avalue : dword);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_dword;
+        tag:=atag;
+        value:=avalue;
+      end;
+
+
+    constructor tai_eabi_attribute.create(atag: dword; const avalue: string);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_ntbs;
+        tag:=atag;
+        valuestr:=NewStr(avalue);
+      end;
+
+
+    destructor tai_eabi_attribute.destroy;
+      begin
+        Inherited Destroy;
+      end;
+
+
+    constructor tai_eabi_attribute.ppuload(t:taitype;ppufile:tcompilerppufile);
+      begin
+      end;
+
+
+    procedure tai_eabi_attribute.ppuwrite(ppufile:tcompilerppufile);
+      begin
+        inherited ppuwrite(ppufile);
+        ppufile.putdword(tag);
+        ppufile.putdword(value);
+      end;
+
+
 {$ifdef JVM}
 
 {****************************************************************************

+ 179 - 69
compiler/aggas.pas

@@ -49,8 +49,9 @@ interface
         function sectionattrs(atype:TAsmSectiontype):string;virtual;
         function sectionattrs_coff(atype:TAsmSectiontype):string;virtual;
         function sectionalignment_aix(atype:TAsmSectiontype;secalign: longint):string;
+        function sectionflags(secflags:TSectionFlags):string;virtual;
         procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;
-          secflags:TSectionFlags=SF_None;secprogbits:TSectionProgbits=SPB_None);virtual;
+          secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);virtual;
         procedure WriteExtraHeader;virtual;
         procedure WriteExtraFooter;virtual;
         procedure WriteInstruction(hp: tai);
@@ -155,10 +156,12 @@ implementation
 
       { Generic unaligned pseudo-instructions, seems ELF specific }
       use_ua_elf_systems = [system_mipsel_linux,system_mipseb_linux,system_mipsel_android,system_mipsel_embedded,system_mipseb_embedded];
-      ait_ua_elf_const2str : array[aitconst_16bit_unaligned..aitconst_64bit_unaligned]
-        of string[20]=(
-          #9'.2byte'#9,#9'.4byte'#9,#9'.8byte'#9
-        );
+      ait_ua_elf_const2str : array[aitconst_128bit..aitconst_64bit_unaligned] of string[20]=(
+        #9'.fixme128'#9,#9'.8byte'#9,#9'.4byte'#9,#9'.2byte'#9,#9'.byte'#9,
+        #9'.sleb128'#9,#9'.uleb128'#9,
+        #9'.rva'#9,#9'.secrel32'#9,#9'.8byte'#9,#9'.4byte'#9,#9'.2byte'#9,#9'.2byte'#9,
+        #9'.2byte'#9,#9'.4byte'#9,#9'.8byte'#9
+      );
 
 
 
@@ -199,7 +202,7 @@ implementation
            (atype<>sec_toc) and
            (atype<>sec_user) and
            { on embedded systems every byte counts, so smartlink bss too }
-           ((atype<>sec_bss) or (target_info.system in systems_embedded));
+           ((atype<>sec_bss) or (target_info.system in (systems_embedded+systems_freertos)));
       end;
 
     function TGNUAssembler.sectionname(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder):string;
@@ -216,9 +219,9 @@ implementation
 { TODO: .data.ro not yet working}
 {$if defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
-{$else arm}
+{$else defined(arm) or defined(riscv64) or defined(powerpc)}
           '.data',
-{$endif arm}
+{$endif defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
           '.bss',
           '.threadvar',
@@ -273,7 +276,8 @@ implementation
           '.objc_protolist',
           '.stack',
           '.heap',
-          '.gcc_except_table'
+          '.gcc_except_table',
+          '.ARM.attributes'
         );
         secnames_pic : array[TAsmSectiontype] of string[length('__DATA, __datacoal_nt,coalesced')] = ('','',
           '.text',
@@ -333,7 +337,8 @@ implementation
           '.objc_protolist',
           '.stack',
           '.heap',
-          '.gcc_except_table'
+          '.gcc_except_table',
+          '..ARM.attributes'
         );
       var
         sep     : string[3];
@@ -407,7 +412,7 @@ implementation
     function TGNUAssembler.sectionattrs(atype:TAsmSectiontype):string;
       begin
         result:='';
-        if (target_info.system in [system_i386_win32,system_x86_64_win64]) then
+        if (target_info.system in [system_i386_win32,system_x86_64_win64,system_aarch64_win64]) then
           begin
             result:=sectionattrs_coff(atype);
           end;
@@ -429,7 +434,10 @@ implementation
 
           { TODO: these need a fix to become read-only }
           sec_rodata, sec_rodata_norel:
-            result:='d';
+            if target_info.system=system_aarch64_win64 then
+              result:='r'
+            else
+              result:='d';
 
           sec_bss:
             result:='b';
@@ -452,6 +460,24 @@ implementation
       end;
 
 
+    function TGNUAssembler.sectionflags(secflags:TSectionFlags):string;
+      var
+        secflag : TSectionFlag;
+      begin
+        result:='';
+        for secflag in secflags do begin
+          case secflag of
+            SF_A:
+              result:=result+'a';
+            SF_W:
+              result:=result+'w';
+            SF_X:
+              result:=result+'x';
+          end;
+        end;
+      end;
+
+
     function TGNUAssembler.sectionalignment_aix(atype:TAsmSectiontype;secalign: longint): string;
       var
         l: longint;
@@ -468,11 +494,16 @@ implementation
       end;
 
 
-    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;secflags:TSectionFlags=SF_None;secprogbits:TSectionProgbits=SPB_None);
+    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);
       var
         s : string;
+        secflag: TSectionFlag;
+        usesectionprogbits,
+        usesectionflags: boolean;
       begin
         writer.AsmLn;
+        usesectionflags:=false;
+        usesectionprogbits:=false;
         case target_info.system of
          system_i386_OS2,
          system_i386_EMX: ;
@@ -481,14 +512,36 @@ implementation
            begin
              { ... but vasm is GAS compatible on amiga/atari, and supports named sections }
              if create_smartlink_sections then
-               writer.AsmWrite('.section ');
+               begin
+                 writer.AsmWrite('.section ');
+                 usesectionflags:=true;
+                 usesectionprogbits:=true;
+                 { hack, to avoid linker warnings on Amiga/Atari, when vlink merges
+                   rodata sections into data sections. Also avoid the warning when
+                   the linker realizes the code section cannot be write protected and
+                   adds the writable bit. }
+                 if atype in [sec_code,sec_rodata,sec_rodata_norel] then
+                   include(secflags,SF_W);
+               end;
+           end;
+         system_i386_win32,
+         system_x86_64_win64,
+         system_i386_wince,
+         system_arm_wince,
+         system_aarch64_win64:
+           begin
+             { according to the GNU AS guide AS for COFF does not support the
+               progbits }
+             writer.AsmWrite('.section ');
+             usesectionflags:=true;
            end;
          system_powerpc_darwin,
          system_i386_darwin,
          system_i386_iphonesim,
          system_powerpc64_darwin,
          system_x86_64_darwin,
-         system_arm_darwin,
+         system_arm_ios,
+         system_aarch64_ios,
          system_aarch64_darwin,
          system_x86_64_iphonesim,
          system_powerpc_aix,
@@ -498,31 +551,44 @@ implementation
                writer.AsmWrite('.section ');
            end
          else
-          writer.AsmWrite('.section ');
+           begin
+             writer.AsmWrite('.section ');
+             { sectionname may rename those sections, so we do not write flags/progbits for them,
+               the assembler will ignore them/spite out a warning anyways }
+             if not(atype in [sec_data,sec_rodata,sec_rodata_norel]) then
+               begin
+                 usesectionflags:=true;
+                 usesectionprogbits:=true;
+               end;
+           end
         end;
         s:=sectionname(atype,aname,aorder);
         writer.AsmWrite(s);
         { flags explicitly defined? }
-        if (secflags<>SF_None) or (secprogbits<>SPB_None) then
+        if (usesectionflags or usesectionprogbits) and
+           ((secflags<>[]) or
+            (secprogbits<>SPB_None)) then
           begin
-            case secflags of
-              SF_A:
-                writer.AsmWrite(',"a"');
-              SF_W:
-                writer.AsmWrite(',"w"');
-              SF_X:
-                writer.AsmWrite(',"x"');
-              SF_None:
-                writer.AsmWrite(',""');
-            end;
-            case secprogbits of
-              SPB_PROGBITS:
-                writer.AsmWrite(',%progbits');
-              SPB_NOBITS:
-                writer.AsmWrite(',%nobits');
-              SPB_None:
-                ;
-            end;
+            if usesectionflags then
+              begin
+                s:=',"'+sectionflags(secflags);
+                writer.AsmWrite(s+'"');
+              end;
+            if usesectionprogbits then
+              begin
+                case secprogbits of
+                  SPB_PROGBITS:
+                    writer.AsmWrite(',%progbits');
+                  SPB_NOBITS:
+                    writer.AsmWrite(',%nobits');
+                  SPB_NOTE:
+                    writer.AsmWrite(',%note');
+                  SPB_None:
+                    ;
+                  else
+                    InternalError(2019100801);
+                end;
+              end;
           end
         else
           case atype of
@@ -544,7 +610,7 @@ implementation
                   system_i386_darwin,
                   system_i386_iphonesim:
                     writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
-                  system_arm_darwin:
+                  system_arm_ios:
                     if (cs_create_pic in current_settings.moduleswitches) then
                       writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                     else
@@ -812,7 +878,7 @@ implementation
              begin
                if tai_section(hp).sectype<>sec_none then
                  if replaceforbidden then
-                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,
+                   WriteSection(tai_section(hp).sectype,ApplyAsmSymbolRestrictions(tai_section(hp).name^),tai_section(hp).secorder,
                      tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                  else
                    WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,
@@ -864,8 +930,8 @@ implementation
                    if tai_datablock(hp).is_global then
                      begin
                        writer.AsmWrite(#9'.globl ');
-                       writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
-                       writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
+                       writer.AsmWriteln(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
+                       writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
                        writer.AsmWriteln(':');
                        writer.AsmWrite(#9'.space ');
                        writer.AsmWriteln(tostr(tai_datablock(hp).size));
@@ -875,7 +941,7 @@ implementation
                    else
                      begin
                        writer.AsmWrite(#9'.lcomm ');
-                       writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
+                       writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
                        writer.AsmWrite(',');
                        writer.AsmWrite(tostr(tai_datablock(hp).size)+',');
                        writer.AsmWrite('_data.bss_,');
@@ -895,7 +961,7 @@ implementation
                          begin
                            writer.AsmWrite(#9'.comm'#9);
                            if replaceforbidden then
-                             writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name))
+                             writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name))
                            else
                              writer.AsmWrite(tai_datablock(hp).sym.name);
                            writer.AsmWrite(','+tostr(tai_datablock(hp).size));
@@ -906,7 +972,7 @@ implementation
                          begin
                            writer.AsmWrite(#9'.lcomm'#9);
                            if replaceforbidden then
-                             writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
+                             writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
                            else
                              writer.AsmWrite(tai_datablock(hp).sym.name);
                            writer.AsmWrite(','+tostr(tai_datablock(hp).size));
@@ -923,7 +989,7 @@ implementation
                              WriteHiddenSymbol(tai_datablock(hp).sym);
                            writer.AsmWrite(#9'.globl ');
                            if replaceforbidden then
-                             writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name))
+                             writer.AsmWriteln(ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name))
                            else
                              writer.AsmWriteln(Tai_datablock(hp).sym.name);
                          end;
@@ -934,10 +1000,10 @@ implementation
                        if replaceforbidden then
                          begin
                            if (tf_needs_symbol_type in target_info.flags) then
-                             writer.AsmWriteln(#9'.type '+ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name)+','+sepChar+'object');
+                             writer.AsmWriteln(#9'.type '+ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name)+','+sepChar+'object');
                            if (tf_needs_symbol_size in target_info.flags) and (tai_datablock(hp).size > 0) then
-                              writer.AsmWriteln(#9'.size '+ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name)+','+tostr(Tai_datablock(hp).size));
-                           writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name))
+                              writer.AsmWriteln(#9'.size '+ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name)+','+tostr(Tai_datablock(hp).size));
+                           writer.AsmWrite(ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name))
                          end
                        else
                          begin
@@ -992,7 +1058,39 @@ implementation
                      writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
                      writer.Asmln;
                    end;
+                 aitconst_tlsgd:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsgd)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tlsdesc:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsdesc)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tpoff:
+                   begin
+                     if assigned(tai_const(hp).endsym) or (tai_const(hp).symofs<>0) then
+                       Internalerror(2019092805);
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tpoff)');
+                     writer.Asmln;
+                   end;
 {$endif cpu64bitaddr}
+                 aitconst_dtpoff:
+                   begin
+{$ifdef arm}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsldo)');
+                     writer.Asmln;
+{$endif arm}
+{$ifdef x86_64}
+                     writer.AsmWrite(#9'.long'#9+tai_const(hp).sym.name+'@dtpoff');
+                     writer.Asmln;
+{$endif x86_64}
+{$ifdef i386}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'@tdpoff');
+                     writer.Asmln;
+{$endif i386}
+                   end;
                  aitconst_got:
                    begin
                      if tai_const(hp).symofs<>0 then
@@ -1075,8 +1173,7 @@ implementation
                          if (constdef in ait_unaligned_consts) and
                             (target_info.system in use_ua_sparc_systems) then
                            writer.AsmWrite(ait_ua_sparc_const2str[constdef])
-                         else if (constdef in ait_unaligned_consts) and
-                                 (target_info.system in use_ua_elf_systems) then
+                         else if (target_info.system in use_ua_elf_systems) then
                            writer.AsmWrite(ait_ua_elf_const2str[constdef])
                          { we can also have unaligned pointers in packed record
                            constants, which don't get translated into
@@ -1105,7 +1202,7 @@ implementation
                                else
                                  s:=tai_const(hp).sym.name;
                                if replaceforbidden then
-                                 s:=ReplaceForbiddenAsmSymbolChars(s);
+                                 s:=ApplyAsmSymbolRestrictions(s);
                                if tai_const(hp).value<>0 then
                                  s:=s+tostr_with_plus(tai_const(hp).value);
                              end
@@ -1207,12 +1304,12 @@ implementation
 {$endif arm}
                      writer.AsmWrite('.globl'#9);
                      if replaceforbidden then
-                       writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_label(hp).labsym.name))
+                       writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_label(hp).labsym.name))
                      else
                        writer.AsmWriteLn(tai_label(hp).labsym.name);
                    end;
                   if replaceforbidden then
-                    writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_label(hp).labsym.name))
+                    writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_label(hp).labsym.name))
                   else
                     writer.AsmWrite(tai_label(hp).labsym.name);
                   writer.AsmWriteLn(':');
@@ -1230,7 +1327,7 @@ implementation
                 begin
                   writer.AsmWrite('.globl'#9);
                   if replaceforbidden then
-                    writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
+                    writer.AsmWriteln(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name))
                   else
                     writer.AsmWriteln(tai_symbol(hp).sym.name);
                   if (tai_symbol(hp).sym.bind=AB_PRIVATE_EXTERN) then
@@ -1265,14 +1362,14 @@ implementation
                        s:=#9'.llong .';
                        ch:='3';
                      end;
-                   writer.AsmWriteLn(#9'.csect '+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name)+'[DS],'+ch);
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name)+':');
-                   writer.AsmWriteln(s+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name)+', TOC[tc0], 0');
+                   writer.AsmWriteLn(#9'.csect '+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name)+'[DS],'+ch);
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name)+':');
+                   writer.AsmWriteln(s+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name)+', TOC[tc0], 0');
                    writer.AsmWriteln(#9'.csect .text[PR]');
                    if (tai_symbol(hp).is_global) then
-                     writer.AsmWriteLn('.globl .'+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
+                     writer.AsmWriteLn('.globl .'+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name))
                    else
-                     writer.AsmWriteLn('.lglobl .'+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name));
+                     writer.AsmWriteLn('.lglobl .'+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name));
                    { the dotted name is the name of the actual function entry }
                    writer.AsmWrite('.');
                  end
@@ -1293,9 +1390,9 @@ implementation
                  end;
                if replaceforbidden then
                  if not(tai_symbol(hp).has_value) then
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name + ':'))
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name + ':'))
                  else
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name + '=' + tostr(tai_symbol(hp).value)))
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name + '=' + tostr(tai_symbol(hp).value)))
                else if not(tai_symbol(hp).has_value) then
                  writer.AsmWriteLn(tai_symbol(hp).sym.name + ':')
                else
@@ -1315,13 +1412,13 @@ implementation
                if replaceforbidden then
                  begin
                    { avoid string truncation }
-                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                   writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_symbolpair(hp).sym^));
                    writer.AsmWrite(s);
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).value^));
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbolpair(hp).value^));
                    if tai_symbolpair(hp).kind=spk_set_global then
                      begin
                        writer.AsmWrite(#9'.globl ');
-                       writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                       writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbolpair(hp).sym^));
                      end;
                  end
                else
@@ -1350,7 +1447,7 @@ implementation
                      (tai_symbol_end(hp).sym.typ=AT_FUNCTION) then
                     writer.AsmWrite('.');
                   if replaceforbidden then
-                    writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbol_end(hp).sym.name))
+                    writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_symbol_end(hp).sym.name))
                   else
                     writer.AsmWrite(tai_symbol_end(hp).sym.name);
                   writer.AsmWrite(', '+s+' - ');
@@ -1359,7 +1456,7 @@ implementation
                      (tai_symbol_end(hp).sym.typ=AT_FUNCTION) then
                     writer.AsmWrite('.');
                   if replaceforbidden then
-                    writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol_end(hp).sym.name))
+                    writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol_end(hp).sym.name))
                   else
                     writer.AsmWriteLn(tai_symbol_end(hp).sym.name);
                 end;
@@ -1434,7 +1531,7 @@ implementation
                if tai_directive(hp).name <>'' then
                  begin
                    if replaceforbidden then
-                     writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_directive(hp).name))
+                     writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_directive(hp).name))
                    else
                      writer.AsmWrite(tai_directive(hp).name);
                  end;
@@ -1481,6 +1578,18 @@ implementation
              begin
                WriteCFI(tai_cfi_base(hp));
              end;
+           ait_eabi_attribute:
+             begin
+               case tai_eabi_attribute(hp).eattr_typ of
+                 eattrtype_dword:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+','+tostr(tai_eabi_attribute(hp).value));
+                 eattrtype_ntbs:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+',"'+tai_eabi_attribute(hp).valuestr^+'"');
+                 else
+                   Internalerror(2019100601);
+               end;
+               writer.AsmLn;
+             end;
            else
              internalerror(2006012201);
          end;
@@ -1512,7 +1621,7 @@ implementation
         if asminfo^.dollarsign='$' then
           writer.AsmWriteLn(s.name)
         else
-          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(s.name))
+          writer.AsmWriteLn(ApplyAsmSymbolRestrictions(s.name))
       end;
 
 
@@ -1521,7 +1630,7 @@ implementation
         { on Windows/(PE)COFF, global symbols are hidden by default: global
           symbols that are not explicitly exported from an executable/library,
           become hidden }
-        if target_info.system in systems_windows then
+        if (target_info.system in (systems_windows+systems_wince)) then
           exit;
         if target_info.system in systems_darwin then
           writer.AsmWrite(#9'.private_extern ')
@@ -1530,7 +1639,7 @@ implementation
         if asminfo^.dollarsign='$' then
           writer.AsmWriteLn(sym.name)
         else
-          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(sym.name))
+          writer.AsmWriteLn(ApplyAsmSymbolRestrictions(sym.name))
       end;
 
 
@@ -1957,7 +2066,8 @@ implementation
          sec_none (* sec_objc_protlist *),
          sec_none (* sec_stack *),
          sec_none (* sec_heap *),
-         sec_none (* gcc_except_table *)
+         sec_none (* gcc_except_table *),
+         sec_none (* sec_arm_attribute *)
         );
       begin
         Result := inherited SectionName (SecXTable [AType], AName, AOrder);

+ 9 - 3
compiler/aopt.pas

@@ -79,6 +79,7 @@ Unit aopt;
 
     uses
       cutils,
+      cprofile,
       globtype, globals,
       verbose,
       cpubase,
@@ -147,6 +148,7 @@ Unit aopt;
           p := BlockStart;
           While (P <> BlockEnd) Do
             Begin
+              prefetch(pointer(p.Next)^);
               Case p.typ Of
                 ait_Label:
                   begin
@@ -190,7 +192,6 @@ Unit aopt;
                       End
                     else if tai_regalloc(p).ratype=ra_dealloc then
                       Begin
-                        ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                         hp1 := p;
                         hp2 := nil;
                         While Not(assigned(FindRegAlloc(tai_regalloc(p).Reg, tai(hp1.Next)))) And
@@ -231,7 +232,9 @@ Unit aopt;
                             AsmL.remove(p);
                             p.free;
                             p := hp1;
-                          end;
+                          end
+                        else
+                          ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                       End
                   End
                 else
@@ -345,6 +348,7 @@ Unit aopt;
         p:=BlockStart;
         while p<>BlockEnd Do
           begin
+            prefetch(pointer(p.Next)^);
             if SchedulerPass1Cpu(p) then
               continue;
             p:=tai(p.next);
@@ -387,12 +391,14 @@ Unit aopt;
       var
         p : TAsmOptimizer;
       begin
+        ResumeTimer(ct_aopt);
         p:=casmoptimizer.Create(AsmL);
         p.Optimize;
 {$ifdef DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
         p.Debug_InsertInstrRegisterDependencyInfo;
 {$endif DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
-        p.free
+        p.free;
+        StopTimer;
       end;
 
 

+ 62 - 27
compiler/aoptbase.pas

@@ -49,9 +49,9 @@ unit aoptbase;
         { returns true if register Reg is used by instruction p1 }
         Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;virtual;
         { returns true if register Reg occurs in operand op }
-        Function RegInOp(Reg: TRegister; const op: toper): Boolean;
+        class function RegInOp(Reg: TRegister; const op: toper): Boolean; static;
         { returns true if register Reg is used in the reference Ref }
-        Function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+        class function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean; static;
 
         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;virtual;
 
@@ -61,13 +61,13 @@ unit aoptbase;
         { gets the next tai object after current that contains info relevant }
         { to the optimizer in p1. If there is none, it returns false and     }
         { sets p1 to nil                                                     }
-        class Function GetNextInstruction(Current: tai; Var Next: tai): Boolean;
-        { gets the previous tai object after current that contains info  }
-        { relevant to the optimizer in last. If there is none, it retuns }
-        { false and sets last to nil                                     }
-        Function GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+        class function GetNextInstruction(Current: tai; out Next: tai): Boolean; static;
+        { gets the previous tai object after current that contains info   }
+        { relevant to the optimizer in last. If there is none, it returns }
+        { false and sets last to nil                                      }
+        class function GetLastInstruction(Current: tai; out Last: tai): Boolean; static;
 
-        function SkipEntryExitMarker(current: tai; var next: tai): boolean;
+        class function SkipEntryExitMarker(current: tai; out next: tai): boolean; static;
 
         { processor dependent methods }
 
@@ -104,10 +104,13 @@ unit aoptbase;
 
         { compares reg1 and reg2 having the same type and being the same super registers
           so the register size is neglected }
-        function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+        class function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
+
+        { returns true if changing reg1 changes reg2 or vice versa }
+        class function RegistersInterfere(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
     end;
 
-    function labelCanBeSkipped(p: tai_label): boolean;
+    function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
 
   implementation
 
@@ -140,10 +143,10 @@ unit aoptbase;
     End;
 
 
-  Function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
+  class function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
     Begin
       Case op.typ Of
-        Top_Reg: RegInOp := SuperRegistersEqual(Reg,op.reg);
+        Top_Reg: RegInOp := RegistersInterfere(Reg,op.reg);
         Top_Ref: RegInOp := RegInRef(Reg, op.ref^);
         {$ifdef arm}
         Top_Shifterop: RegInOp := op.shifterop^.rs = Reg;
@@ -154,18 +157,18 @@ unit aoptbase;
     End;
 
 
-  Function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+  class function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
   Begin
-    RegInRef := SuperRegistersEqual(Ref.Base,Reg)
+    RegInRef := RegistersInterfere(Ref.Base,Reg)
 {$ifdef cpurefshaveindexreg}
-    Or SuperRegistersEqual(Ref.Index,Reg)
+    Or RegistersInterfere(Ref.Index,Reg)
 {$endif cpurefshaveindexreg}
 {$ifdef x86}
     or (Reg=Ref.segment)
     { if Ref.segment isn't set, the cpu uses implicitly ss or ds, depending on the base register }
     or ((Ref.segment=NR_NO) and (
-      ((Reg=NR_SS) and (SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP))) or
-      ((Reg=NR_DS) and not(SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP)))
+      ((Reg=NR_SS) and (RegistersInterfere(Ref.base,NR_EBP) or RegistersInterfere(Ref.base,NR_ESP))) or
+      ((Reg=NR_DS) and not(RegistersInterfere(Ref.base,NR_EBP) or RegistersInterfere(Ref.base,NR_ESP)))
     ))
 {$endif x86}
   End;
@@ -176,13 +179,13 @@ unit aoptbase;
   End;
 
 
-  function labelCanBeSkipped(p: tai_label): boolean; inline;
+  function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
   begin
     labelCanBeSkipped := not(p.labsym.is_used) or (p.labsym.labeltype<>alt_jump);
   end;
 
 
-  class Function TAOptBase.GetNextInstruction(Current: tai; Var Next: tai): Boolean;
+  class function TAOptBase.GetNextInstruction(Current: tai; out Next: tai): Boolean;
   Begin
     Repeat
       Current := tai(Current.Next);
@@ -195,7 +198,12 @@ unit aoptbase;
 {$endif cpudelayslot}
              ((Current.typ = ait_label) And
               labelCanBeSkipped(Tai_Label(Current)))) Do
-        Current := tai(Current.Next);
+        begin
+          { this won't help the current loop, but it helps when returning from GetNextInstruction
+            as the next entry is probably already in the cache }
+          prefetch(pointer(Current.Next)^);
+          Current := Tai(Current.Next);
+        end;
       If Assigned(Current) And
          (Current.typ = ait_Marker) And
          (Tai_Marker(Current).Kind = mark_NoPropInfoStart) Then
@@ -203,7 +211,12 @@ unit aoptbase;
           While Assigned(Current) And
                 ((Current.typ <> ait_Marker) Or
                  (Tai_Marker(Current).Kind <> mark_NoPropInfoEnd)) Do
-            Current := Tai(Current.Next);
+            begin
+              { this won't help the current loop, but it helps when returning from GetNextInstruction
+                as the next entry is probably already in the cache }
+              prefetch(pointer(Current.Next)^);
+              Current := Tai(Current.Next);
+            end;
         End;
     Until Not(Assigned(Current)) Or
           (Current.typ <> ait_Marker) Or
@@ -221,7 +234,7 @@ unit aoptbase;
         End;
   End;
 
-  Function TAOptBase.GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+  class function TAOptBase.GetLastInstruction(Current: tai; out Last: tai): Boolean;
   Begin
     Repeat
       Current := Tai(Current.previous);
@@ -263,12 +276,12 @@ unit aoptbase;
   End;
 
 
-  function TAOptBase.SkipEntryExitMarker(current: tai; var next: tai): boolean;
+  class function TAOptBase.SkipEntryExitMarker(current: tai; out next: tai): boolean;
     begin
       result:=true;
+      next:=current;
       if current.typ<>ait_marker then
         exit;
-      next:=current;
       while GetNextInstruction(next,next) do
         begin
           if (next.typ<>ait_marker) or not(tai_marker(next).Kind in [mark_Position,mark_BlockStart]) then
@@ -316,12 +329,34 @@ unit aoptbase;
     end;
 
 
-  function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+  class function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;{$ifdef USEINLINE}inline;{$endif}
   Begin
-    Result:=(getregtype(reg1) = getregtype(reg2)) and
-            (getsupreg(reg1) = getsupreg(Reg2));
+    { Do an optimized version of
+
+      Result:=(getregtype(reg1) = getregtype(reg2)) and
+      (getsupreg(reg1) = getsupreg(Reg2));
+
+      as SuperRegistersEqual is used a lot
+    }
+{$ifdef Z80}
+    { Z80 registers are indexed in an incompatible way (without R_SUBH), so it
+      needs a special check. }
+    Result:=super_registers_equal(reg1,reg2);
+{$else Z80}
+    Result:=(DWord(reg1) and $ff00ffff)=(DWord(reg2) and $ff00ffff);
+{$endif Z80}
   end;
 
+
+  class function TAOptBase.RegistersInterfere(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
+    begin
+{$ifdef Z80}
+      result:=registers_interfere(reg1,reg2);
+{$else Z80}
+      result:=SuperRegistersEqual(reg1,reg2);
+{$endif Z80}
+    end;
+
   { ******************* Processor dependent stuff *************************** }
 
   Function TAOptBase.RegMaxSize(Reg: TRegister): TRegister;

Різницю між файлами не показано, бо вона завелика
+ 1018 - 121
compiler/aoptobj.pas


+ 2 - 2
compiler/aoptutils.pas

@@ -36,7 +36,7 @@ unit aoptutils;
 {$endif max_operands>2}
 
     { skips all labels and returns the next "real" instruction }
-    function SkipLabels(hp: tai; var hp2: tai): boolean;
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
 
     { sets hp2 to hp and returns True if hp is not nil }
     function SetAndTest(const hp: tai; out hp2: tai): Boolean;
@@ -68,7 +68,7 @@ unit aoptutils;
 
 
     { skips all labels and returns the next "real" instruction }
-    function SkipLabels(hp: tai; var hp2: tai): boolean;
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
       begin
         while assigned(hp.next) and
               (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do

+ 24 - 7
compiler/arm/aasmcpu.pas

@@ -866,6 +866,7 @@ implementation
             A_NEG,
             A_VABS,A_VADD,A_VCVT,A_VDIV,A_VLDR,A_VMOV,A_VMUL,A_VNEG,A_VSQRT,A_VSUB,
             A_VEOR,
+            A_VMRS,A_VMSR,
             A_MRS,A_MSR:
               if opnr=0 then
                 result:=operand_write
@@ -901,7 +902,9 @@ implementation
                 result := operand_read;
             //Thumb2
             A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS, A_BFI,
-            A_SMMLA,A_SMMLS:
+            A_QADD,
+            A_PKHTB,A_PKHBT,
+            A_SMMLA,A_SMMLS,A_SMUAD,A_SMUSD:
               if opnr in [0] then
                 result:=operand_write
               else
@@ -920,7 +923,10 @@ implementation
             A_STREX:
               result:=operand_write;
             else
-              internalerror(200403151);
+              begin
+                writeln(opcode);
+                internalerror(200403151);
+              end;
           end;
       end;
 
@@ -1160,8 +1166,8 @@ implementation
                                             begin
                                               if (hp2.typ=ait_const) and (tai_const(hp2).sym=tai_const(hp).sym)
                                                 and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label) and
-                                                { gottpoff symbols are PC relative, so we cannot reuse them }
-                                                (tai_const(hp2).consttype<>aitconst_gottpoff) then
+                                                { gottpoff and tlsgd symbols are PC relative, so we cannot reuse them }
+                                                (not(tai_const(hp2).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc])) then
                                                 begin
                                                   with taicpu(curtai).oper[curop]^.ref^ do
                                                     begin
@@ -2228,6 +2234,7 @@ implementation
             { fpu_vfpv3_d16  } IF_VFPv2 or IF_VFPv3,
             { fpu_fpv4_s16   } IF_NONE,
             { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
+            { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
             { fpu_neon_vfpv4 } IF_VFPv2 or IF_VFPv3 or IF_VFPv4 or IF_NEON
           );
       begin
@@ -3051,13 +3058,23 @@ implementation
                 begin
                   currsym:=objdata.symbolref(oper[0]^.ref^.symbol);
 
-                  bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
+                  { tlscall is not relative so ignore the offset }
+                  if oper[0]^.ref^.refaddr<>addr_tlscall then
+                    bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
 
                   if (opcode<>A_BL) or (condition<>C_None) then
                     objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_24)
                   else
-                    objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
-
+                    case oper[0]^.ref^.refaddr of
+                      addr_pic:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_ARM_CALL);
+                      addr_full:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
+                      addr_tlscall:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_TLS_CALL);
+                      else
+                        Internalerror(2019092903);
+                    end;
                   exit;
                 end;
             end;

+ 32 - 7
compiler/arm/agarmgas.pas

@@ -49,6 +49,7 @@ unit agarmgas;
 
       TArmAppleGNUAssembler=class(TAppleGNUassembler)
         constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
         procedure WriteExtraHeader; override;
       end;
 
@@ -107,6 +108,12 @@ unit agarmgas;
         case current_settings.fputype of
           fpu_soft:
             result:='-mfpu=softvfp '+result;
+          fpu_fpa:
+            result:='-mfpu=fpa '+result;
+          fpu_fpa10:
+            result:='-mfpu=fpa10 '+result;
+          fpu_fpa11:
+            result:='-mfpu=fpa11 '+result;
           fpu_vfpv2:
             result:='-mfpu=vfpv2 '+result;
           fpu_vfpv3:
@@ -115,6 +122,7 @@ unit agarmgas;
             result:='-mfpu=neon-vfpv3 '+result;
           fpu_vfpv3_d16:
             result:='-mfpu=vfpv3-d16 '+result;
+          fpu_fpv4_sp_d16,
           fpu_fpv4_s16:
             result:='-mfpu=fpv4-sp-d16 '+result;
           fpu_vfpv4:
@@ -160,6 +168,18 @@ unit agarmgas;
       end;
 
 
+    function TArmAppleGNUAssembler.MakeCmdLine: TCmdStr;
+      begin
+        result:=inherited MakeCmdLine;
+	if (asminfo^.id in [as_clang_gas,as_clang_asdarwin]) then
+          begin
+            if fputypestrllvm[current_settings.fputype] <> '' then
+              result:='-m'+fputypestrllvm[current_settings.fputype]+' '+result;
+            { Apple arm always uses softfp floating point ABI }
+            result:='-mfloat-abi=softfp '+result;
+          end;
+      end;
+
     procedure TArmAppleGNUAssembler.WriteExtraHeader;
       begin
         inherited WriteExtraHeader;
@@ -194,7 +214,9 @@ unit agarmgas;
                 if offset<>0 then
                   s:=s+tostr_with_plus(offset);
                 if refaddr=addr_pic then
-                  s:=s+'(PLT)';
+                  s:=s+'(PLT)'
+                else if refaddr=addr_tlscall then
+                  s:=s+'(tlscall)';
               end
             else
               begin
@@ -426,9 +448,10 @@ unit agarmgas;
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_arm_linux,system_arm_netbsd,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
-                                 system_arm_embedded,system_arm_symbian,system_arm_android,system_arm_aros];
+                                 system_arm_embedded,system_arm_symbian,system_arm_android,system_arm_aros,system_arm_freertos];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );
@@ -439,9 +462,10 @@ unit agarmgas;
             idtxt  : 'AS-DARWIN';
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch $ARCH';
-            supported_targets : [system_arm_darwin];
+            supported_targets : [system_arm_ios];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
             labelprefix : 'L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );
@@ -449,13 +473,14 @@ unit agarmgas;
 
        as_arm_clang_darwin_info : tasminfo =
           (
-            id     : as_clang;
+            id     : as_clang_asdarwin;
             idtxt  : 'CLANG';
             asmbin : 'clang';
-            asmcmd : '-c -o $OBJ $EXTRAOPT -arch $ARCH $DARWINVERSION -x assembler $ASM';
-            supported_targets : [system_arm_darwin];
-            flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
+            asmcmd : '-x assembler -c -target $TRIPLET -o $OBJ $EXTRAOPT -x assembler $ASM';
+            supported_targets : [system_arm_ios];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_llvm];
             labelprefix : 'L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );

+ 103 - 589
compiler/arm/aoptcpu.pas

@@ -26,19 +26,24 @@ Unit aoptcpu;
 {$i fpcdefs.inc}
 
 { $define DEBUG_PREREGSCHEDULER}
-{$define DEBUG_AOPTCPU}
+{ $define DEBUG_AOPTCPU}
 
 Interface
 
-uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
+uses
+  cgbase, cgutils, cpubase, aasmtai,
+  aasmcpu,
+  aopt, aoptobj, aoptarm;
 
 Type
-  TCpuAsmOptimizer = class(TAsmOptimizer)
+  TCpuAsmOptimizer = class(TARMAsmOptimizer)
+    { Can't be done in some cases due to the limited range of jumps }
+    function CanDoJumpOpts: Boolean; override;
+
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
     Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
-    function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
     function RemoveSuperfluousVMov(const p : tai; movp : tai; const optimizer : string) : boolean;
 
     { gets the next tai object after current that contains info relevant
@@ -46,7 +51,6 @@ Type
       change in program flow.
       If there is none, it returns false and
       sets p1 to nil                                                     }
-    Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
     Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
 
     { outputs a debug message into the assembler file }
@@ -106,63 +110,6 @@ Implementation
     end;
 
 
-  function RefsEqual(const r1, r2: treference): boolean;
-    begin
-      refsequal :=
-        (r1.offset = r2.offset) and
-        (r1.base = r2.base) and
-        (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
-        (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
-        (r1.relsymbol = r2.relsymbol) and
-        (r1.signindex = r2.signindex) and
-        (r1.shiftimm = r2.shiftimm) and
-        (r1.addressmode = r2.addressmode) and
-        (r1.shiftmode = r2.shiftmode) and
-        (r1.volatility=[]) and
-        (r2.volatility=[]);
-    end;
-
-  function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
-  begin
-    result :=
-      (instr.typ = ait_instruction) and
-      ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
-      ((cond = []) or (taicpu(instr).condition in cond)) and
-      ((postfix = []) or (taicpu(instr).oppostfix in postfix));
-  end;
-
-  function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
-  begin
-    result :=
-      (instr.typ = ait_instruction) and
-      (taicpu(instr).opcode = op) and
-      ((cond = []) or (taicpu(instr).condition in cond)) and
-      ((postfix = []) or (taicpu(instr).oppostfix in postfix));
-  end;
-
-  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
-    begin
-      result := oper1.typ = oper2.typ;
-
-      if result then
-        case oper1.typ of
-          top_const:
-            Result:=oper1.val = oper2.val;
-          top_reg:
-            Result:=oper1.reg = oper2.reg;
-          top_conditioncode:
-            Result:=oper1.cc = oper2.cc;
-          top_ref:
-            Result:=RefsEqual(oper1.ref^, oper2.ref^);
-          else Result:=false;
-        end
-    end;
-
-  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
-    begin
-      result := (oper.typ = top_reg) and (oper.reg = reg);
-    end;
-
   function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
     begin
       Result:=false;
@@ -224,7 +171,7 @@ Implementation
       if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
                           A_CMP, A_CMN, A_TST, A_TEQ,
                           A_B, A_BL, A_BX, A_BLX,
-                          A_SMLAL, A_UMLAL]) then i:=0;
+                          A_SMLAL, A_UMLAL, A_VSTM, A_VLDM]) then i:=0;
 
       while(i<p.ops) do
         begin
@@ -244,7 +191,11 @@ Implementation
             else
               ;
           end;
-          if instructionLoadsFromReg then exit; {Bailout if we found something}
+          if (i=0) and (p.opcode in [A_LDM,A_VLDM]) then
+            exit;
+
+          if instructionLoadsFromReg then
+            exit; {Bailout if we found something}
           Inc(I);
         end;
     end;
@@ -326,20 +277,6 @@ Implementation
     end;
 
 
-  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
-    Out Next: tai; reg: TRegister): Boolean;
-    begin
-      Next:=Current;
-      repeat
-        Result:=GetNextInstruction(Next,Next);
-      until not (Result) or
-            not(cs_opt_level3 in current_settings.optimizerswitches) or
-            (Next.typ<>ait_instruction) or
-            RegInInstruction(reg,Next) or
-            is_calljmp(taicpu(Next).opcode) or
-            RegModifiedByInstruction(NR_PC,Next);
-    end;
-
   function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
     Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
     begin
@@ -379,89 +316,13 @@ Implementation
     end;
 {$endif DEBUG_AOPTCPU}
 
-  function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
-    var
-      alloc,
-      dealloc : tai_regalloc;
-      hp1 : tai;
-    begin
-      Result:=false;
-      if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
-         (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
-         MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
-         { don't mess with moves to pc }
-         (taicpu(movp).oper[0]^.reg<>NR_PC) and
-         { don't mess with moves to lr }
-         (taicpu(movp).oper[0]^.reg<>NR_R14) and
-         { the destination register of the mov might not be used beween p and movp }
-         not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
-         { cb[n]z are thumb instructions which require specific registers, with no wide forms }
-         (taicpu(p).opcode<>A_CBZ) and
-         (taicpu(p).opcode<>A_CBNZ) and
-         {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
-         not (
-           (taicpu(p).opcode in [A_MLA, A_MUL]) and
-           (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
-           (current_settings.cputype < cpu_armv6)
-         ) and
-         { Take care to only do this for instructions which REALLY load to the first register.
-           Otherwise
-             str reg0, [reg1]
-             mov reg2, reg0
-           will be optimized to
-             str reg2, [reg1]
-         }
-         regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
-        begin
-          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
-          if assigned(dealloc) then
-            begin
-              DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
-              result:=true;
-
-              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
-                and remove it if possible }
-              asml.Remove(dealloc);
-              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
-              if assigned(alloc) then
-                begin
-                  asml.Remove(alloc);
-                  alloc.free;
-                  dealloc.free;
-                end
-              else
-                asml.InsertAfter(dealloc,p);
-
-              { try to move the allocation of the target register }
-              GetLastInstruction(movp,hp1);
-              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
-              if assigned(alloc) then
-                begin
-                  asml.Remove(alloc);
-                  asml.InsertBefore(alloc,p);
-                  { adjust used regs }
-                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
-                end;
 
-              { finally get rid of the mov }
-              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
-              { Remove preindexing and postindexing for LDR in some cases.
-                For example:
-                  ldr	reg2,[reg1, xxx]!
-                  mov reg1,reg2
-                must be translated to:
-                  ldr	reg1,[reg1, xxx]
-
-                Preindexing must be removed there, since the same register is used as the base and as the target.
-                Such case is not allowed for ARM CPU and produces crash. }
-              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
-                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
-              then
-                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
-              asml.remove(movp);
-              movp.free;
-            end;
-        end;
+  function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
+    begin
+      { Cannot perform these jump optimisations if the ARM architecture has 16-bit thumb codes }
+      Result := not (
+        (current_settings.instructionset = is_thumb) and not (CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype])
+      );
     end;
 
 
@@ -1227,9 +1088,52 @@ Implementation
                       ....
                     }
                     if (taicpu(p).ops = 2) and
-                       GetNextInstruction(p,hp1) and
+                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                        (tai(hp1).typ = ait_instruction) then
                       begin
+                        {
+                          This removes the mul from
+                          mov rX,0
+                          ...
+                          mul ...,rX,...
+                        }
+                        if false and (taicpu(p).oper[1]^.typ = top_const) and
+                          (taicpu(p).oper[1]^.val=0) and
+                          MatchInstruction(hp1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                          (((taicpu(hp1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^)) or
+                           ((taicpu(hp1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^))) then
+                            begin
+                              TransferUsedRegs(TmpUsedRegs);
+                              UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                              UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                              DebugMsg('Peephole MovMUL/MLA2Mov0 done', p);
+                              if taicpu(hp1).opcode=A_MUL then
+                                taicpu(hp1).loadconst(1,0)
+                              else
+                                taicpu(hp1).loadreg(1,taicpu(hp1).oper[3]^.reg);
+                              taicpu(hp1).ops:=2;
+                              taicpu(hp1).opcode:=A_MOV;
+                              if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                                RemoveCurrentP(p);
+                              Result:=true;
+                              exit;
+                            end
+                        else if (taicpu(p).oper[1]^.typ = top_const) and
+                          (taicpu(p).oper[1]^.val=0) and
+                          MatchInstruction(hp1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+                          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[3]^) then
+                            begin
+                              TransferUsedRegs(TmpUsedRegs);
+                              UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                              UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                              DebugMsg('Peephole MovMLA2MUL 1 done', p);
+                              taicpu(hp1).ops:=3;
+                              taicpu(hp1).opcode:=A_MUL;
+                              if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
+                                RemoveCurrentP(p);
+                              Result:=true;
+                              exit;
+                            end
                         {
                           This changes the very common
                           mov r0, #0
@@ -1239,7 +1143,7 @@ Implementation
 
                           and removes all superfluous mov instructions
                         }
-                        if (taicpu(p).oper[1]^.typ = top_const) and
+                        else if (taicpu(p).oper[1]^.typ = top_const) and
                            (taicpu(hp1).opcode=A_STR) then
                           while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
                                 MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
@@ -1277,60 +1181,14 @@ Implementation
                               if not assigned(hp1) then
                                 break;
                             end;
+                         if RedundantMovProcess(p,hp1) then
+                           begin
+                             Result:=true;
+                             { p might not point at a mov anymore }
+                             exit;
+                           end;
                       end;
-                    {
-                      change
-                      mov r1, r0
-                      add r1, r1, #1
-                      to
-                      add r1, r0, #1
 
-                      Todo: Make it work for mov+cmp too
-
-                      CAUTION! If this one is successful p might not be a mov instruction anymore!
-                    }
-                    if (taicpu(p).ops = 2) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
-                       (taicpu(p).oppostfix = PF_NONE) and
-                       GetNextInstruction(p, hp1) and
-                       MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
-                                              A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
-                                        [taicpu(p).condition], []) and
-                       {MOV and MVN might only have 2 ops}
-                       (taicpu(hp1).ops >= 2) and
-                       MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
-                       (taicpu(hp1).oper[1]^.typ = top_reg) and
-                       (
-                         (taicpu(hp1).ops = 2) or
-                         (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
-                       ) then
-                      begin
-                      { When we get here we still don't know if the registers match}
-                        for I:=1 to 2 do
-                          {
-                            If the first loop was successful p will be replaced with hp1.
-                            The checks will still be ok, because all required information
-                            will also be in hp1 then.
-                          }
-                          if (taicpu(hp1).ops > I) and
-                             MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) and
-                             { prevent certain combinations on thumb(2), this is only a safe approximation }
-                             (not(GenerateThumbCode or GenerateThumb2Code) or
-                              ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
-                               (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15))
-                             ) then
-                            begin
-                              DebugMsg('Peephole RedundantMovProcess done', hp1);
-                              taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
-                              if p<>hp1 then
-                              begin
-                                asml.remove(p);
-                                p.free;
-                                p:=hp1;
-                                Result:=true;
-                              end;
-                            end;
-                      end;
                     { Fold the very common sequence
                         mov  regA, regB
                         ldr* regA, [regA]
@@ -1473,6 +1331,9 @@ Implementation
                                   hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
                                        taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
                                        taicpu(p).oper[2]^.shifterop^);
+                              if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
+                                AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hp1,UsedRegs);
+                              AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
                               asml.insertbefore(hp2, hp1);
                               GetNextInstruction(p, hp2);
                               asml.remove(p);
@@ -1577,183 +1438,20 @@ Implementation
                 A_RSC,
                 A_SUB,
                 A_SBC,
-                A_AND,
                 A_BIC,
                 A_EOR,
                 A_ORR,
                 A_MLA,
                 A_MLS,
-                A_MUL:
+                A_MUL,
+                A_QADD,A_QADD16,A_QADD8,
+                A_QSUB,A_QSUB16,A_QSUB8,
+                A_QDADD,A_QDSUB,A_QASX,A_QSAX,
+                A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
+                A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
+                A_PKHTB,A_PKHBT,
+                A_SMUAD,A_SMUSD:
                   begin
-                        {
-                          optimize
-                          and reg2,reg1,const1
-                          ...
-                        }
-                    if (taicpu(p).opcode = A_AND) and
-                       (taicpu(p).ops>2) and
-                       (taicpu(p).oper[1]^.typ = top_reg) and
-                       (taicpu(p).oper[2]^.typ = top_const) then
-                      begin
-                        {
-                          change
-                          and reg2,reg1,const1
-                          ...
-                          and reg3,reg2,const2
-                          to
-                          and reg3,reg1,(const1 and const2)
-                        }
-                        if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                        MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
-                        RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                        (taicpu(hp1).oper[2]^.typ = top_const) then
-                          begin
-                            if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
-                              begin
-                                DebugMsg('Peephole AndAnd2And done', p);
-                                taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
-                                taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
-                                taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
-                                asml.remove(hp1);
-                                hp1.free;
-                                Result:=true;
-                              end
-                            else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                              begin
-                                DebugMsg('Peephole AndAnd2And done', hp1);
-                                taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
-                                taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
-                                taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
-                                GetNextInstruction(p, hp1);
-                                asml.remove(p);
-                                p.free;
-                                p:=hp1;
-                                Result:=true;
-                              end;
-                          end
-                        {
-                          change
-                          and reg2,reg1,$xxxxxxFF
-                          strb reg2,[...]
-                          dealloc reg2
-                          to
-                          strb reg1,[...]
-                        }
-                        else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
-                          MatchInstruction(p, A_AND, [C_None], [PF_None]) and
-                          GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                          MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
-                          assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
-                          { the reference in strb might not use reg2 }
-                          not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
-                          { reg1 might not be modified inbetween }
-                          not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                          begin
-                            DebugMsg('Peephole AndStrb2Strb done', p);
-                            taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
-                            GetNextInstruction(p, hp1);
-                            asml.remove(p);
-                            p.free;
-                            p:=hp1;
-                            result:=true;
-                          end
-                        {
-                          change
-                          and reg2,reg1,255
-                          uxtb/uxth reg3,reg2
-                          dealloc reg2
-                          to
-                          and reg3,reg1,x
-                        }
-                        else if (taicpu(p).oper[2]^.val = $FF) and
-                          MatchInstruction(p, A_AND, [C_None], [PF_None]) and
-                          GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                          MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
-                          (taicpu(hp1).ops = 2) and
-                          RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                          MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                          { reg1 might not be modified inbetween }
-                          not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                          begin
-                            DebugMsg('Peephole AndUxt2And done', p);
-                            taicpu(hp1).opcode:=A_AND;
-                            taicpu(hp1).ops:=3;
-                            taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
-                            taicpu(hp1).loadconst(2,255);
-                            GetNextInstruction(p,hp1);
-                            asml.remove(p);
-                            p.Free;
-                            p:=hp1;
-                            result:=true;
-                          end
-                        {
-                          from
-                          and reg1,reg0,2^n-1
-                          mov reg2,reg1, lsl imm1
-                          (mov reg3,reg2, lsr/asr imm1)
-                          remove either the and or the lsl/xsr sequence if possible
-                        }
-
-                        else if cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
-                          GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                          MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
-                          (taicpu(hp1).ops=3) and
-                          MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                          (taicpu(hp1).oper[2]^.typ = top_shifterop) and
-                          (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
-                          (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
-                          RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
-                          begin
-                            {
-                              and reg1,reg0,2^n-1
-                              mov reg2,reg1, lsl imm1
-                              mov reg3,reg2, lsr/asr imm1
-                              =>
-                              and reg1,reg0,2^n-1
-                              if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
-                            }
-                            if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
-                              MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
-                              (taicpu(hp2).ops=3) and
-                              MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
-                              (taicpu(hp2).oper[2]^.typ = top_shifterop) and
-                              (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
-                              (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
-                              (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
-                              RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
-                              ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
-                              ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
-                               (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
-                              begin
-                                DebugMsg('Peephole AndLslXsr2And done', p);
-                                taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
-                                asml.Remove(hp1);
-                                asml.Remove(hp2);
-                                hp1.free;
-                                hp2.free;
-                                result:=true;
-                              end
-                            {
-                              and reg1,reg0,2^n-1
-                              mov reg2,reg1, lsl imm1
-                              =>
-                              mov reg2,reg0, lsl imm1
-                              if imm1>i
-                            }
-                            else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
-                                    not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
-                              begin
-                                DebugMsg('Peephole AndLsl2Lsl done', p);
-                                taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
-                                GetNextInstruction(p, hp1);
-                                asml.Remove(p);
-                                p.free;
-                                p:=hp1;
-                                result:=true;
-                              end
-                          end;
-                      end;
                     {
                       change
                       add/sub reg2,reg1,const1
@@ -1911,6 +1609,7 @@ Implementation
                           begin
                             taicpu(hp1).opcode:=A_MLS;
 
+
                             taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
 
                             if taicpu(hp1).ops=2 then
@@ -1921,12 +1620,12 @@ Implementation
                             taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
 
                             DebugMsg('MulSub2MLS done', p);
+                            AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
+                            AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
+                            AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
 
                             taicpu(hp1).ops:=4;
-
-                            asml.remove(p);
-                            p.free;
-                            p:=hp1;
+                            RemoveCurrentP(p, hp1); // <-- Is this actually safe? hp1 is not necessarily the next instruction. [Kit]
                           end;
 
                         result:=true;
@@ -1977,204 +1676,13 @@ Implementation
                   end;
 {$endif dummy}
                 A_UXTB:
-                  begin
-                    {
-                      change
-                      uxtb reg2,reg1
-                      strb reg2,[...]
-                      dealloc reg2
-                      to
-                      strb reg1,[...]
-                    }
-                    if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
-                      (taicpu(p).ops=2) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                      MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
-                      assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
-                      { the reference in strb might not use reg2 }
-                      not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
-                      { reg1 might not be modified inbetween }
-                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                      begin
-                        DebugMsg('Peephole UxtbStrb2Strb done', p);
-                        taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
-                        GetNextInstruction(p,hp2);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp2;
-                        result:=true;
-                      end
-                    {
-                      change
-                      uxtb reg2,reg1
-                      uxth reg3,reg2
-                      dealloc reg2
-                      to
-                      uxtb reg3,reg1
-                    }
-                    else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
-                      (taicpu(p).ops=2) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                      MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
-                      (taicpu(hp1).ops = 2) and
-                      MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                      RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                      { reg1 might not be modified inbetween }
-                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                      begin
-                        DebugMsg('Peephole UxtbUxth2Uxtb done', p);
-                        taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
-                        asml.remove(hp1);
-                        hp1.free;
-                        result:=true;
-                      end
-                    {
-                      change
-                      uxtb reg2,reg1
-                      uxtb reg3,reg2
-                      dealloc reg2
-                      to
-                      uxtb reg3,reg1
-                    }
-                    else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
-                      (taicpu(p).ops=2) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                      MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
-                      (taicpu(hp1).ops = 2) and
-                      MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                      RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                      { reg1 might not be modified inbetween }
-                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                      begin
-                        DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
-                        taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
-                        asml.remove(hp1);
-                        hp1.free;
-                        result:=true;
-                      end
-                    {
-                      change
-                      uxtb reg2,reg1
-                      and reg3,reg2,#0x*FF
-                      dealloc reg2
-                      to
-                      uxtb reg3,reg1
-                    }
-                    else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
-                      (taicpu(p).ops=2) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                      MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
-                      (taicpu(hp1).ops=3) and
-                      (taicpu(hp1).oper[2]^.typ=top_const) and
-                      ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
-                      MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                      RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                      { reg1 might not be modified inbetween }
-                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                      begin
-                        DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
-                        taicpu(hp1).opcode:=A_UXTB;
-                        taicpu(hp1).ops:=2;
-                        taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
-                        GetNextInstruction(p,hp2);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp2;
-                        result:=true;
-                      end
-                    else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
-                         RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
-                      Result:=true;
-                  end;
+                  Result:=OptPass1UXTB(p);
                 A_UXTH:
-                  begin
-                    {
-                      change
-                      uxth reg2,reg1
-                      strh reg2,[...]
-                      dealloc reg2
-                      to
-                      strh reg1,[...]
-                    }
-                    if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
-                      (taicpu(p).ops=2) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                      MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
-                      RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                      { the reference in strb might not use reg2 }
-                      not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
-                      { reg1 might not be modified inbetween }
-                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                      begin
-                        DebugMsg('Peephole UXTHStrh2Strh done', p);
-                        taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
-                        GetNextInstruction(p, hp1);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp1;
-                        result:=true;
-                      end
-                    {
-                      change
-                      uxth reg2,reg1
-                      uxth reg3,reg2
-                      dealloc reg2
-                      to
-                      uxth reg3,reg1
-                    }
-                    else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
-                      (taicpu(p).ops=2) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                      MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
-                      (taicpu(hp1).ops=2) and
-                      MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                      RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                      { reg1 might not be modified inbetween }
-                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                      begin
-                        DebugMsg('Peephole UxthUxth2Uxth done', p);
-                        taicpu(hp1).opcode:=A_UXTH;
-                        taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
-                        GetNextInstruction(p, hp1);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp1;
-                        result:=true;
-                      end
-                    {
-                      change
-                      uxth reg2,reg1
-                      and reg3,reg2,#65535
-                      dealloc reg2
-                      to
-                      uxth reg3,reg1
-                    }
-                    else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
-                      (taicpu(p).ops=2) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-                      MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
-                      (taicpu(hp1).ops=3) and
-                      (taicpu(hp1).oper[2]^.typ=top_const) and
-                      ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
-                      MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-                      RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
-                      { reg1 might not be modified inbetween }
-                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
-                      begin
-                        DebugMsg('Peephole UxthAndImm2Uxth done', p);
-                        taicpu(hp1).opcode:=A_UXTH;
-                        taicpu(hp1).ops:=2;
-                        taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
-                        GetNextInstruction(p, hp1);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp1;
-                        result:=true;
-                      end
-                    else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
-                         RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
-                      Result:=true;
-                  end;
+                  Result:=OptPass1UXTH(p);
+                A_SXTB:
+                  Result:=OptPass1SXTB(p);
+                A_SXTH:
+                  Result:=OptPass1SXTH(p);
                 A_CMP:
                   begin
                     {
@@ -2282,6 +1790,8 @@ Implementation
                         DebugMsg('Peephole VMovVMov2VMov done', p);
                       end;
                   end;
+                A_AND:
+                  Result:=OptPass1And(p);
                 A_VLDR,
                 A_VADD,
                 A_VMUL,
@@ -2534,7 +2044,7 @@ Implementation
           exit;
         regtype:=getregtype(reg);
         supreg:=getsupreg(reg);
-        if (cg.rg[regtype].live_end[supreg]=hp1) and
+        if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_end[supreg]=hp1) and
           RegInInstruction(reg,p) then
           cg.rg[regtype].live_end[supreg]:=p;
       end;
@@ -2549,7 +2059,7 @@ Implementation
           exit;
         regtype:=getregtype(reg);
         supreg:=getsupreg(reg);
-        if (cg.rg[regtype].live_start[supreg]=p) and
+        if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_start[supreg]=p) and
           RegInInstruction(reg,hp1) then
          cg.rg[regtype].live_start[supreg]:=hp1;
       end;
@@ -2657,7 +2167,11 @@ Implementation
             ) and
             GetNextInstruction(hp1,hp2) and
             (hp2.typ=ait_instruction) and
-            { loaded register used by next instruction? }
+            { loaded register used by next instruction?
+
+              if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
+              the bl may not be scheduled away from the bl) and it needs to be taken care of this case
+            }
             (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
             { loaded register not used by previous instruction? }
             not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and

+ 75 - 43
compiler/arm/cgcpu.pas

@@ -276,7 +276,7 @@ unit cgcpu;
       begin
         inherited init_register_allocators;
         { currently, we always save R14, so we can use it }
-        if (target_info.system<>system_arm_darwin) then
+        if (target_info.system<>system_arm_ios) then
             begin
               if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
                 rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
@@ -2084,7 +2084,7 @@ unit cgcpu;
              begin
                reference_reset(ref,4,[]);
                if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                 (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
+                 (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                  begin
                    if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                      begin
@@ -2115,14 +2115,16 @@ unit cgcpu;
                    begin
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
-                     { FSTMX is deprecated on ARMv6 and later }
-                     {if (current_settings.cputype<cpu_armv6) then
-                       postfix:=PF_IAX
-                     else
-                       postfix:=PF_IAD;}
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                    end
+                 else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                   begin
+                     ref.index:=ref.base;
+                     ref.base:=NR_NO;
+                     if mmregs<>[] then
+                       list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
+                   end
                  else
                    internalerror(2019050923);
                end;
@@ -2176,7 +2178,7 @@ unit cgcpu;
                         }
                       end;
                 end;
-              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   { restore vfp registers? }
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
@@ -2193,7 +2195,7 @@ unit cgcpu;
               begin
                 reference_reset(ref,4,[]);
                 if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                   (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
+                   (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                   begin
                     if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                       begin
@@ -2223,13 +2225,15 @@ unit cgcpu;
                     begin
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
-                      { FLDMX is deprecated on ARMv6 and later }
-                      {if (current_settings.cputype<cpu_armv6) then
-                        mmpostfix:=PF_IAX
-                      else
-                        mmpostfix:=PF_IAD;}
-                     if mmregs<>[] then
-                       list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
+                      if mmregs<>[] then
+                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
+                    end
+                  else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                    begin
+                      ref.index:=ref.base;
+                      ref.base:=NR_NO;
+                      if mmregs<>[] then
+                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
                     end
                   else
                     internalerror(2019050921);
@@ -2483,7 +2487,7 @@ unit cgcpu;
         indirection_done:=false;
         if assigned(ref.symbol) then
           begin
-            if (target_info.system=system_arm_darwin) and
+            if (target_info.system=system_arm_ios) and
                (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
               begin
                 tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
@@ -2493,6 +2497,17 @@ unit cgcpu;
               end
             else if ref.refaddr=addr_gottpoff then
               current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsgd then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsdesc then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tpoff then
+              begin
+                if assigned(ref.relsymbol) or (ref.offset<>0) then
+                  Internalerror(2019092804);
+
+                current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+              end
             else if (cs_create_pic in current_settings.moduleswitches) then
               if (tf_pic_uses_got in target_info.flags) then
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
@@ -2600,9 +2615,9 @@ unit cgcpu;
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -3090,11 +3105,12 @@ unit cgcpu;
         list.concat(instr);
         case instr.opcode of
           A_VMOV:
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
             add_move_instruction(instr);
           else
-            ;
+            { VCVT can generate an exception }
+            maybe_check_for_fpu_exception(list);
         end;
-        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3154,13 +3170,10 @@ unit cgcpu;
             end;
           end
         else
-          begin
-             handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
-          end;
+          handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
 
         if (tmpmmreg<>reg) then
           a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
-        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3223,10 +3236,8 @@ unit cgcpu;
             end;
           end
         else
-          begin
-             handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
-          end;
-        maybe_check_for_fpu_exception(list);
+          handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
+        { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
       end;
 
 
@@ -3242,7 +3253,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
           internalerror(2009112516);
         list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
-        maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3258,7 +3269,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
           internalerror(2009112514);
         list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
-        maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3339,10 +3350,13 @@ unit cgcpu;
 
     procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
       begin
-        list.concat(tai_regalloc.alloc(NR_R0,nil));
-        a_call_name(list,'fpc_read_tp',false);
-        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
-        list.concat(tai_regalloc.dealloc(NR_R0,nil));
+        if pi_needs_tls in current_procinfo.flags then
+          begin
+            list.concat(tai_regalloc.alloc(NR_R0,nil));
+            a_call_name(list,'fpc_read_tp',false);
+            a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
+            list.concat(tai_regalloc.dealloc(NR_R0,nil));
+          end;
       end;
 
 
@@ -3396,7 +3410,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112405);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
-        cg.maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3407,7 +3421,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112406);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
-        cg.maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -4306,7 +4320,7 @@ unit cgcpu;
       begin
         inherited init_register_allocators;
         { currently, we save R14 always, so we can use it }
-        if (target_info.system<>system_arm_darwin) then
+        if (target_info.system<>system_arm_ios) then
           rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
               [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
                RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
@@ -4318,12 +4332,19 @@ unit cgcpu;
         rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
 
-        if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
+        if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
+          (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
               ],first_mm_imreg,[])
+        else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
+              [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
+               RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
+               RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
+              ],first_mm_imreg,[])
         else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
@@ -5126,6 +5147,17 @@ unit cgcpu;
 
                 if ref.refaddr=addr_gottpoff then
                   current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsgd then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsdesc then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tpoff then
+                  begin
+                    if assigned(ref.relsymbol) or (ref.offset<>0) then
+                      Internalerror(2019092805);
+
+                    current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+                  end
                 else
                   current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
 
@@ -5238,7 +5270,7 @@ unit cgcpu;
             instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
             list.Concat(instr);
             add_move_instruction(instr);
-            maybe_check_for_fpu_exception(list);
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
           end
         else if (fromsize=OS_F64) and
           (tosize=OS_F64) then
@@ -5264,7 +5296,7 @@ unit cgcpu;
     procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
       begin
         handle_load_store(list,A_VSTR,PF_None,reg,ref);
-        maybe_check_for_fpu_exception(list);
+        { VSTR cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -5284,7 +5316,7 @@ unit cgcpu;
           (fromsize=OS_F32) then
           begin
             list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
-            maybe_check_for_fpu_exception(list);
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
           end
         else
           internalerror(2012100814);

+ 28 - 5
compiler/arm/cpubase.pas

@@ -113,9 +113,6 @@ unit cpubase;
 
       VOLATILE_INTREGISTERS_DARWIN = [RS_R0..RS_R3,RS_R9,RS_R12..RS_R14];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                           Instruction post fixes
 *****************************************************************************}
@@ -368,6 +365,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
@@ -446,7 +446,7 @@ unit cpubase;
             begin
               case getsubreg(reg) of
                 R_SUBFD,
-                R_SUBWHOLE:
+                R_SUBMMWHOLE:
                   result:=OS_F64;
                 R_SUBFS:
                   result:=OS_F32;
@@ -543,6 +543,26 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
       var
          i : longint;
@@ -615,6 +635,9 @@ unit cpubase;
           end;
       end;
     
+{$push}
+{ Disable range and overflow checking here }
+{$R-}{$Q-}        
     function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
       var
         msb : byte;
@@ -623,9 +646,9 @@ unit cpubase;
         msb:=BsrDword(d);
         
         width:=msb-lsb+1;
-        
         result:=(lsb<>255) and (msb<>255) and (aword(((1 shl (msb-lsb+1))-1) shl lsb) = d);
       end;
+{$pop}
 
 
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword) : boolean;

+ 21 - 5
compiler/arm/cpuelf.pas

@@ -28,7 +28,7 @@ interface
 implementation
 
   uses
-    globtype,cutils,cclasses,
+    globtype,globals,cutils,cclasses,
     verbose, elfbase,
     systems,aasmbase,ogbase,ogelf,assemble;
 
@@ -335,9 +335,24 @@ implementation
           result:=R_ARM_THM_CALL;
         RELOC_GOT32:
           result:=R_ARM_GOT_BREL;
+        RELOC_TPOFF:
+          if current_settings.tlsmodel=tlsm_initial_exec then
+            result:=R_ARM_TLS_IE32
+          else if current_settings.tlsmodel=tlsm_local_exec then
+            result:=R_ARM_TLS_LE32
+          else
+            Internalerror(2019092901);
+        RELOC_TLSGD:
+          result:=R_ARM_TLS_GD32;
+        RELOC_TLSDESC:
+          result:=R_ARM_TLS_GOTDESC;
+        RELOC_TLS_CALL:
+          result:=R_ARM_TLS_CALL;
+        RELOC_ARM_CALL:
+          result:=R_ARM_CALL;
+        RELOC_DTPOFF:
+          result:=R_ARM_TLS_LDO32;
       else
-        result:=0;
-        writeln(objrel.typ);
         InternalError(2012110602);
       end;
     end;
@@ -956,12 +971,13 @@ implementation
          idtxt  : 'ELF';
          asmbin : '';
          asmcmd : '';
-         supported_targets : [system_arm_embedded,system_arm_darwin,
+         supported_targets : [system_arm_embedded,system_arm_ios,
                               system_arm_linux,system_arm_netbsd,
                               system_arm_gba,system_arm_nds,
-                              system_arm_aros];
+                              system_arm_aros,system_arm_freertos];
          flags : [af_outputbinary,af_smartlink_sections,af_supports_dwarf];
          labelprefix : '.L';
+         labelmaxlen : -1;
          comment : '';
          dollarsign: '$';
        );

+ 44 - 26
compiler/arm/cpuinfo.pas

@@ -54,6 +54,9 @@ Type
        cpu_armv7r,
        cpu_armv7m,
        cpu_armv7em
+       { when new elements added afterwards,
+         update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas
+       }
       );
 
    tinstructionset = (is_thumb,is_arm);
@@ -70,17 +73,16 @@ Type
       fpu_vfpv3,
       fpu_neon_vfpv3,
       fpu_vfpv3_d16,
-      fpu_fpv4_s16,
+      fpu_fpv4_s16,     { same as fpu_fpv4_sp_d32, kept for backwards compatibility }
       fpu_vfpv4,
+      fpu_fpv4_sp_d16,  { 32 registers single precision, for load/store/move they can be accessed as 16 double registers }
       fpu_neon_vfpv4
-      { when new elements added afterwards, update also fpu_vfp_last below }
+      { when new elements added afterwards, update
+        class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
      );
 
 Const
-   fpu_vfp_first = fpu_vfpv2;
-   fpu_vfp_last  = fpu_neon_vfpv4;
-
-  fputypestrllvm : array[tfputype] of string[14] = ('',
+  fputypestrllvm : array[tfputype] of string[15] = ('',
     '',
     '',
     '',
@@ -92,6 +94,7 @@ Const
     'fpu=vfpv3-d16',
     'fpu=vfpv4-s16',
     'fpu=vfpv4',
+    'fpu=fpv4-sp-d16',
     'fpu=neon-vfpv4'
   );
 
@@ -344,6 +347,9 @@ Type
       ct_stm32f756xe,
       ct_stm32f756xg,
 
+      ct_stm32g071rb,
+      ct_nucleog071rb,
+
       { TI - Fury Class - 64 K Flash, 16 K SRAM Devices }
       ct_lm3s1110,
       ct_lm3s1133,
@@ -504,6 +510,9 @@ Type
       ct_nrf52832_xxaa,
       ct_nrf52840_xxaa,
 
+      { Raspberry Pi 2 }
+      ct_raspi2,
+
       // generic Thumb2 target
       ct_thumb2bare
      );
@@ -563,7 +572,8 @@ Const
      'ARMV7EM'
    );
 
-   fputypestr : array[tfputype] of string[10] = ('',
+   fputypestr : array[tfputype] of string[11] = (
+     'NONE',
      'SOFT',
      'LIBGCC',
      'FPA',
@@ -575,6 +585,7 @@ Const
      'VFPV3_D16',
      'FPV4_S16',
      'VFPV4',
+     'FPV4_SP_D16',
      'NEON_VFPV4'
    );
 
@@ -811,8 +822,8 @@ Const
       (controllertypestr:'STM32F401RD';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00060000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F401VD';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00060000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F401CE';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
-      (controllertypestr:'STM32F401RE';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
-      (controllertypestr:'NUCLEOF401RE';    controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
+      (controllertypestr:'STM32F401RE';     controllerunitstr:'STM32F401XE';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
+      (controllertypestr:'NUCLEOF401RE';    controllerunitstr:'STM32F401XE';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F401VE';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F407VG';     controllerunitstr:'STM32F407XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00100000; srambase:$20000000; sramsize:$00020000),
       (controllertypestr:'DISCOVERYF407VG'; controllerunitstr:'STM32F407XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00100000; srambase:$20000000; sramsize:$00020000),
@@ -862,6 +873,9 @@ Const
       (controllertypestr:'STM32F756XE';     controllerunitstr:'STM32F756';        cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20010000; sramsize:$00040000),
       (controllertypestr:'STM32F756XG';     controllerunitstr:'STM32F756';        cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00100000; srambase:$20010000; sramsize:$00040000),
 
+      (controllertypestr:'STM32G071RB'         ; controllerunitstr:'STM32G071XX'         ; cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00009000),
+      (controllertypestr:'NUCLEOG071RB'        ; controllerunitstr:'STM32G071XX'         ; cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00009000),
+
       (controllertypestr:'LM3S1110';	controllerunitstr:'LM3FURY';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00010000;	srambase:$20000000;	sramsize:$00004000),
       (controllertypestr:'LM3S1133';	controllerunitstr:'LM3FURY';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00010000;	srambase:$20000000;	sramsize:$00004000),
       (controllertypestr:'LM3S1138';	controllerunitstr:'LM3FURY';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00010000;	srambase:$20000000;	sramsize:$00004000),
@@ -1020,6 +1034,9 @@ Const
       (controllertypestr:'NRF52832_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       (controllertypestr:'NRF52840_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       
+      { Raspberry Pi 2 }
+      (controllertypestr:'RASPI2'; controllerunitstr:'RASPI2'; cputype:cpu_armv7a; fputype:fpu_vfpv4; flashbase:$00000000; flashsize:$00000000; srambase:$00008000; sramsize:$10000000),
+
       { Bare bones }
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
     );
@@ -1030,12 +1047,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath,cs_opt_forcenostackframe];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
@@ -1060,9 +1077,9 @@ Const
    tfpuflags =
       (
         FPUARM_HAS_FPA,                { fpu is an fpa based FPU                                                               }
-        FPUARM_HAS_VFP_EXTENSION,      { fpu is a vfp extension                                                                }
+        FPUARM_HAS_VFP_EXTENSION,      { fpu is a vfp extension, it means at least single operation support                    }
         FPUARM_HAS_VFP_DOUBLE,         { vfp has double support                                                                }
-        FPUARM_HAS_VFP_SINGLE_ONLY,    { vfp has only single support, disjunct to FPUARM_HAS_VFP_DOUBLE, for error checking    }
+        FPUARM_HAS_VFP_DOUBLE_MOVLDST, { vfp has only single support, but MOV, LD, ST can be done on pairs as double           }
         FPUARM_HAS_32REGS,             { vfp has 32 regs, without this flag, 16 are assumed                                    }
         FPUARM_HAS_VMOV_CONST,         { vmov supports (some) real constants                                                   }
         FPUARM_HAS_EXCEPTION_TRAPPING, { vfp does exceptions trapping                                                          }
@@ -1094,19 +1111,20 @@ Const
      );
 
      fpu_capabilities : array[tfputype] of set of tfpuflags =
-       ( { fpu_none       } [],
-         { fpu_soft       } [],
-         { fpu_libgcc     } [],
-         { fpu_fpa        } [FPUARM_HAS_FPA],
-         { fpu_fpa10      } [FPUARM_HAS_FPA],
-         { fpu_fpa11      } [FPUARM_HAS_FPA],
-         { fpu_vfpv2      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
-         { fpu_vfpv3      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
-         { fpu_neon_vfpv3 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
-         { fpu_vfpv3_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
-         { fpu_fpv4_s16   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_SINGLE_ONLY,FPUARM_HAS_VMOV_CONST],
-         { fpu_vfpv4      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
-         { fpu_neon_vfpv4 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
+       ( { fpu_none         } [],
+         { fpu_soft         } [],
+         { fpu_libgcc       } [],
+         { fpu_fpa          } [FPUARM_HAS_FPA],
+         { fpu_fpa10        } [FPUARM_HAS_FPA],
+         { fpu_fpa11        } [FPUARM_HAS_FPA],
+         { fpu_vfpv2        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
+         { fpu_vfpv3        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
+         { fpu_neon_vfpv3   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
+         { fpu_vfpv3_d16    } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
+         { fpu_fpv4_s16     } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_32REGS,FPUARM_HAS_VFP_DOUBLE_MOVLDST,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_vfpv4        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_fpv4_sp_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_32REGS,FPUARM_HAS_VFP_DOUBLE_MOVLDST,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_neon_vfpv4   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
        );
 
    { contains all CPU supporting any kind of thumb instruction set }

+ 2 - 1
compiler/arm/cpunode.pas

@@ -46,7 +46,8 @@ unit cpunode;
        narmcnv,
        narmcon,
        narmset,
-       narmmem
+       narmmem,
+       narmutil
 {$else}
        llvmnode
 {$endif}

+ 7 - 7
compiler/arm/cpupara.pas

@@ -40,7 +40,7 @@ unit cpupara;
           function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
+          procedure getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
@@ -59,14 +59,14 @@ unit cpupara;
 
     uses
        verbose,systems,cutils,
-       defutil,symsym,symcpu,symtable,
+       defutil,symsym,symcpu,symtable,symutil,
        { PowerPC uses procinfo as well in cpupara, so this should not hurt }
        procinfo;
 
 
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
-        if (target_info.system<>system_arm_darwin) then
+        if (target_info.system<>system_arm_ios) then
           result:=VOLATILE_INTREGISTERS
         else
           result:=VOLATILE_INTREGISTERS_DARWIN;
@@ -94,7 +94,7 @@ unit cpupara;
       end;
 
 
-    procedure tcpuparamanager.getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    procedure tcpuparamanager.getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
         psym : tparavarsym;
@@ -149,7 +149,7 @@ unit cpupara;
                 getparaloc:=LOC_MMREGISTER
               else if (calloption in cdecl_pocalls) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
+                 (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
                   but Mac OS X doesn't seem to do that and linux only does it if
                   built with the "-mfloat-abi=hard" option }
@@ -294,7 +294,7 @@ unit cpupara;
                   for i:=0 to trecorddef(def).symtable.SymList.count-1 do
                     begin
                       sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
+                      if not is_normal_fieldvarsym(sym) then
                         continue;
                       { bitfield -> ignore }
                       if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
@@ -782,7 +782,7 @@ unit cpupara;
               end
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
+               (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
               begin
                 case retcgsize of
                   OS_64,

+ 1 - 1
compiler/arm/cpupi.pas

@@ -88,7 +88,7 @@ unit cpupi;
           end;
         if tg.direction = -1 then
           begin
-            if (target_info.system<>system_arm_darwin) then
+            if (target_info.system<>system_arm_ios) then
               { Non-Darwin, worst case: r4-r10,r11,r13,r14,r15 is saved -> -28-16, but we
                 always adjust the frame pointer to point to the first stored
                 register (= last register in list above) -> + 4 }

+ 7 - 0
compiler/arm/cputarg.pas

@@ -62,9 +62,16 @@ implementation
     {$ifndef NOTARGETBSD}
       ,t_bsd
     {$endif}
+    {$ifndef NOTARGETDARWIN}
+      ,t_darwin
+    {$endif}
     {$ifndef NOTARGETAROS}
       ,t_aros
     {$endif}
+    {$ifndef NOTARGETFREERTOS}
+      ,t_freertos
+    {$endif}
+
 
 {**************************************
              Assemblers

+ 7 - 65
compiler/arm/narmadd.pas

@@ -240,7 +240,7 @@ interface
                  location.register,left.location.register,right.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
@@ -326,7 +326,7 @@ interface
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VMRS,NR_APSR_nzcv,NR_FPSCR));
               location.resflags:=GetFpuResFlags;
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
@@ -576,77 +576,19 @@ interface
           end;
       end;
 
+
     function tarmaddnode.first_addfloat: tnode;
-      var
-        procname: string[31];
-        { do we need to reverse the result ? }
-        notnode : boolean;
-        fdef : tdef;
       begin
         result := nil;
-        notnode := false;
 
-        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+        if (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) and
+           not(FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
           begin
             case tfloatdef(left.resultdef).floattype of
               s32real:
-                begin
-                  result:=nil;
-                  notnode:=false;
-                end;
+                ;
               s64real:
-                begin
-                  fdef:=search_system_type('FLOAT64').typedef;
-                  procname:='float64';
-
-                  case nodetype of
-                    addn:
-                      procname:=procname+'_add';
-                    muln:
-                      procname:=procname+'_mul';
-                    subn:
-                      procname:=procname+'_sub';
-                    slashn:
-                      procname:=procname+'_div';
-                    ltn:
-                      procname:=procname+'_lt';
-                    lten:
-                      procname:=procname+'_le';
-                    gtn:
-                      begin
-                        procname:=procname+'_lt';
-                        swapleftright;
-                      end;
-                    gten:
-                      begin
-                        procname:=procname+'_le';
-                        swapleftright;
-                      end;
-                    equaln:
-                      procname:=procname+'_eq';
-                    unequaln:
-                      begin
-                        procname:=procname+'_eq';
-                        notnode:=true;
-                      end;
-                    else
-                      CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),left.resultdef.typename,right.resultdef.typename);
-                  end;
-
-                  if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
-                    resultdef:=pasbool1type;
-                  result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
-                      ctypeconvnode.create_internal(right,fdef),
-                      ccallparanode.create(
-                        ctypeconvnode.create_internal(left,fdef),nil))),resultdef);
-
-                  left:=nil;
-                  right:=nil;
-
-                  { do we need to reverse the result }
-                  if notnode then
-                    result:=cnotnode.create(result);
-                end;
+                result:=first_addfloat_soft;
               else
                 internalerror(2019050933);
             end;

+ 1 - 1
compiler/arm/narmcal.pas

@@ -83,7 +83,7 @@ implementation
          (target_info.abi<>abi_eabihf) and
          (procdefinition.proccalloption<>pocall_hardfloat) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last])) then
+          (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype])) then
         begin
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary

+ 12 - 25
compiler/arm/narmcnv.pas

@@ -32,33 +32,17 @@ interface
        tarmtypeconvnode = class(tcgtypeconvnode)
          protected
            function first_int_to_real: tnode;override;
-           function first_real_to_real: tnode; override;
-         { procedure second_int_to_int;override; }
-         { procedure second_string_to_string;override; }
-         { procedure second_cstring_to_pchar;override; }
-         { procedure second_string_to_chararray;override; }
-         { procedure second_array_to_pointer;override; }
-         // function first_int_to_real: tnode; override;
-         { procedure second_pointer_to_array;override; }
-         { procedure second_chararray_to_string;override; }
-         { procedure second_char_to_string;override; }
+           function first_real_to_real: tnode;override;
            procedure second_int_to_real;override;
-         // procedure second_real_to_real;override;
-         { procedure second_cord_to_pointer;override; }
-         { procedure second_proc_to_procvar;override; }
-         { procedure second_bool_to_int;override; }
            procedure second_int_to_bool;override;
-         { procedure second_load_smallset;override;  }
-         { procedure second_ansistring_to_pchar;override; }
-         { procedure second_pchar_to_string;override; }
-         { procedure second_class_to_intf;override; }
-         { procedure second_char_to_char;override; }
        end;
 
 implementation
 
    uses
-      verbose,globtype,globals,symdef,aasmbase,aasmtai,aasmdata,symtable,
+      verbose,globtype,globals,
+      systems,
+      symdef,aasmbase,aasmtai,aasmdata,symtable,
       defutil,
       cgbase,cgutils,
       pass_1,pass_2,procinfo,ncal,
@@ -78,7 +62,8 @@ implementation
 {$ifdef cpufpemu}
           (current_settings.fputype=fpu_soft) or
 {$endif cpufpemu}
-          (FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) then
+          (not(FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) and
+           not(FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype])) then
           result:=inherited first_int_to_real
         else
           begin
@@ -117,17 +102,19 @@ implementation
               fpu_fpa10,
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfp_first..fpu_vfp_last:
-                expectloc:=LOC_MMREGISTER;
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
               else
                 internalerror(2009112702);
             end;
           end;
       end;
 
+
     function tarmtypeconvnode.first_real_to_real: tnode;
       begin
-        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+        if (current_settings.fputype=fpu_soft) and
+           not (target_info.system in systems_wince) then
           begin
             case tfloatdef(left.resultdef).floattype of
               s32real:
@@ -255,7 +242,7 @@ implementation
                 location.register,left.location.register),
                 signedprec2vfppf[signed,location.size]));
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;

+ 6 - 2
compiler/arm/narmcon.pas

@@ -55,7 +55,9 @@ interface
       begin
         result:=nil;
         if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
-           IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) and
+           ((tfloatdef(resultdef).floattype=s32real) or
+            (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[init_settings.fputype])) then
            expectloc:=LOC_MMREGISTER
          else
            expectloc:=LOC_CREFERENCE;
@@ -76,7 +78,9 @@ interface
 
       begin
         if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
-          IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+          IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) and
+           ((tfloatdef(resultdef).floattype=s32real) or
+            (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[init_settings.fputype])) then
           begin
             location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
             location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);

+ 12 - 12
compiler/arm/narminl.pas

@@ -86,7 +86,12 @@ implementation
                  location.loc := LOC_FPUREGISTER;
                end;
             end;
-          fpu_vfp_first..fpu_vfp_last:
+          fpu_soft:
+            begin
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
+              location_copy(location,left.location);
+            end
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location_copy(location,left.location);
@@ -95,11 +100,6 @@ implementation
                  location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                  location.loc := LOC_MMREGISTER;
                end;
-            end;
-          fpu_soft:
-            begin
-              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
-              location_copy(location,left.location);
             end
           else
             internalerror(2009111801);
@@ -125,7 +125,7 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
               else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                 expectloc:=LOC_MMREGISTER
-              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
@@ -153,7 +153,7 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
               else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                 expectloc:=LOC_MMREGISTER
-              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
@@ -181,7 +181,7 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
               else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                 expectloc:=LOC_MMREGISTER
-              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
@@ -265,7 +265,7 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
@@ -296,7 +296,7 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
@@ -327,7 +327,7 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);

+ 93 - 16
compiler/arm/narmld.pas

@@ -41,7 +41,7 @@ implementation
     uses
       globals,verbose,
       cgbase,cgobj,cgutils,
-      aasmdata,
+      aasmdata,aasmcpu,
       systems,
       symcpu,symdef,
       nld,
@@ -67,21 +67,98 @@ implementation
               begin
                 if not(pi_uses_threadvar in current_procinfo.flags) then
                   internalerror(2012012101);
-                current_asmdata.getjumplabel(l);
-                reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA),-8,sizeof(AInt),[]);
-                href.refaddr:=addr_gottpoff;
-                href.relsymbol:=l;
-                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
-                cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
-                cg.a_label(current_asmdata.CurrAsmList,l);
-                reference_reset(href,0,[]);
-                href.base:=NR_PC;
-                href.index:=hregister;
-                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
-                cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
-                location.reference.base:=current_procinfo.tlsoffset;
-                location.reference.index:=hregister;
-                handled:=true;
+                case current_settings.tlsmodel of
+                  tlsm_global_dynamic:
+                    begin
+{$ifdef use_tls_dialect_gnu}
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsgd;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,hregister,NR_PC,NR_R0);
+                      cg.g_call(current_asmdata.CurrAsmList,'__tls_get_addr');
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=hregister;
+{$else use_tls_dialect_gnu}
+                      { On arm, we use the gnu2 tls dialect. It has the advantage that it can be relaxed (optimized) by the linker,
+                        this is not possible with the gnu tls dialect.
+
+                        gnu2 is proposed and documented in
+                          Glauber de Oliveira Costa, Alexandre Oliva: Speeding Up Thread-Local Storage Access in DynamicLibraries in the ARM platform, 2006.
+                          Link: https://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
+                      }
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsdesc;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,NR_R0);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+
+                      { we have to go the ugly way so we can set addr_tlscall }
+                      cg.allocallcpuregisters(current_asmdata.CurrAsmList);
+                      cg.a_call_name(current_asmdata.CurrAsmList,gvs.mangledname,false);
+                      with taicpu(current_asmdata.CurrAsmList.Last) do
+                        begin
+                          if opcode<>A_BL then
+                            Internalerror(2019092902);
+                          oper[0]^.ref^.refaddr:=addr_tlscall;
+                        end;
+                      cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
+
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+{$endif use_tls_dialect_gnu}
+                      handled:=true;
+                    end;
+                  tlsm_initial_exec:
+                    begin
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      reference_reset(href,0,[]);
+                      href.base:=NR_PC;
+                      href.index:=hregister;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  tlsm_local_exec:
+                    begin
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      reference_reset(href,0,[]);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  else
+                    Internalerror(2019092802);
+                end;
               end;
           end;
 

+ 9 - 14
compiler/arm/narmmat.pas

@@ -54,7 +54,7 @@ implementation
       globtype,compinnr,
       cutils,verbose,globals,constexp,
       aasmbase,aasmcpu,aasmtai,aasmdata,
-      defutil,
+      defutil,systems,
       symtype,symconst,symtable,
       cgbase,cgobj,hlcgobj,cgutils,
       pass_2,procinfo,
@@ -358,17 +358,9 @@ implementation
         procname: string[31];
         fdef : tdef;
       begin
-        if (current_settings.fputype=fpu_soft) and
-           (left.resultdef.typ=floatdef) then
-          begin
-            result:=nil;
-            firstpass(left);
-            expectloc:=LOC_REGISTER;
-            exit;
-          end;
-
-        if not(FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) or
-          (tfloatdef(resultdef).floattype=s32real) then
+        if (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) or
+           (target_info.system = system_arm_wince) or
+           is_single(resultdef) then
           exit(inherited pass_1);
 
         result:=nil;
@@ -376,7 +368,10 @@ implementation
         if codegenerror then
           exit;
 
-        if (left.resultdef.typ=floatdef) then
+        { if we get here and VFP support is on, there is no 64 bit VFP operation support available,
+          so in this case the software version needs to be called }
+        if (left.resultdef.typ=floatdef) and ((current_settings.fputype=fpu_soft) or
+          (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype])) then
           begin
             case tfloatdef(resultdef).floattype of
               s64real:
@@ -447,7 +442,7 @@ implementation
                 location.register,left.location.register), pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[init_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[init_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;

+ 332 - 0
compiler/arm/narmutil.pas

@@ -0,0 +1,332 @@
+{
+    Copyright (c) 2019 by Florian Klämpfl
+
+    ARM version of some node tree helper routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmutil;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    cclasses,ngenutil;
+
+  type
+    tarmnodeutils = class(tnodeutils)
+      class procedure InsertObjectInfo; override;
+      class procedure insert_init_final_table(entries: tfplist); override;
+    end;
+
+
+  implementation
+
+    uses
+      verbose,
+      systems,
+      globals,
+      cpuinfo,cpubase,
+      cgbase,cgutils,
+      aasmbase,aasmdata,aasmtai,aasmcpu,
+      symdef;
+
+    const
+      Tag_File = 1;
+      Tag_Section = 2;
+      Tag_Symbol = 3;
+      Tag_CPU_raw_name = 4;
+      Tag_CPU_name = 5;
+      Tag_CPU_arch = 6;
+      Tag_CPU_arch_profile = 7;
+      Tag_ARM_ISA_use = 8;
+      Tag_THUMB_ISA_use = 9;
+      Tag_FP_Arch = 10;
+      Tag_WMMX_arch = 11;
+      Tag_Advanced_SIMD_arch = 12;
+      Tag_PCS_config = 13;
+      Tag_ABI_PCS_R9_use = 14;
+      Tag_ABI_PCS_RW_data = 15;
+      Tag_ABI_PCS_RO_data = 16;
+      Tag_ABI_PCS_GOT_use = 17;
+      Tag_ABI_PCS_wchar_t = 18;
+      Tag_ABI_FP_rounding = 19;
+      Tag_ABI_FP_denormal = 20;
+      Tag_ABI_FP_exceptions = 21;
+      Tag_ABI_FP_user_exceptions = 22;
+      Tag_ABI_FP_number_model = 23;
+      Tag_ABI_align_needed = 24;
+      Tag_ABI_align8_preserved = 25;
+      Tag_ABI_enum_size = 26;
+      Tag_ABI_HardFP_use = 27;
+      Tag_ABI_VFP_args = 28;
+      Tag_ABI_WMMX_args = 29;
+      Tag_ABI_optimization_goals = 30;
+      Tag_ABI_FP_optimization_goals = 31;
+      Tag_compatiblity = 32;
+      Tag_CPU_unaligned_access = 34;
+      Tag_FP_HP_extension = 36;
+      Tag_ABI_FP_16bit_format = 38;
+      Tag_MPextension_use = 42;
+      Tag_DIV_use = 44;
+      Tag_nodefaults = 64;
+      Tag_also_compatible_with = 65;
+      Tag_conformance = 67;
+      Tag_T2EE_use = 66;
+      Tag_Virtualization_use = 68;
+
+    class procedure tarmnodeutils.InsertObjectInfo;
+      begin
+        inherited InsertObjectInfo;
+        { write eabi attributes to object file? }
+        if (target_info.system in [system_arm_linux]) and (target_info.abi in [abi_eabihf,abi_eabi]) then
+          begin
+            case current_settings.cputype of
+              cpu_armv3:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,''));
+                end;
+              cpu_armv4:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,1));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4'));
+                end;
+              cpu_armv4t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,2));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4T'));
+                end;
+              cpu_armv5t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,3));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5T'));
+                end;
+              cpu_armv5te:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,4));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TE'));
+                end;
+              cpu_armv5tej:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,5));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TEJ'));
+                end;
+              cpu_armv6:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,6));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6'));
+                end;
+              cpu_armv6k:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,9));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6K'));
+                end;
+              cpu_armv6t2:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,8));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'T2'));
+                end;
+              cpu_armv6z:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,7));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6Z'));
+                end;
+              cpu_armv6m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,11));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6-M'));
+                end;
+              cpu_armv7:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7'));
+                end;
+              cpu_armv7a:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$41));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-A'));
+                end;
+              cpu_armv7r:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$52));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-R'));
+                end;
+              cpu_armv7m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-M'));
+                end;
+              cpu_armv7em:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,13));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7E-M'));
+                end;
+              else
+                Internalerror(2019100602);
+            end;
+            case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc,
+              fpu_fpa,
+              fpu_fpa10,
+              fpu_fpa11:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,0));
+              fpu_vfpv2:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,2));
+              fpu_vfpv3,
+              fpu_neon_vfpv3:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,3));
+              fpu_vfpv3_d16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,4));
+              fpu_fpv4_sp_d16,
+              fpu_fpv4_s16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,6));
+              fpu_vfpv4,
+              fpu_neon_vfpv4:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,5));
+              { else not needed anymore PM 2020/04/13
+                Internalerror(2019100603); }
+            end;
+            if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,2))
+            else if FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,1))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ARM_ISA_use,1));
+            if CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,2))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_VFP_args,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_denormal,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_exceptions,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_number_model,3));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align_needed,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align8_preserved,1));
+            { gcc typically writes more like enum size, wchar size, optimization goal, however, this
+              is normally not module global in FPC }
+          end;
+      end;
+
+    class procedure tarmnodeutils.insert_init_final_table(entries:tfplist);
+
+      procedure genentry(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R14]),PF_FD));
+            end;
+        end;
+
+      procedure genexit(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R15]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R15]),PF_FD));
+            end;
+        end;
+
+      var
+        initList, finalList, header: TAsmList;
+        entry : pinitfinalentry;
+        i : longint;
+      begin
+        if not(tf_init_final_units_by_calls in target_info.flags) then
+          begin
+            inherited insert_init_final_table(entries);
+            exit;
+          end;
+        initList:=TAsmList.create;
+        finalList:=TAsmList.create;
+
+        genentry(finalList);
+        genentry(initList);
+
+        for i:=0 to entries.count-1 do
+          begin
+            entry:=pinitfinalentry(entries[i]);
+            if entry^.finifunc<>'' then
+              finalList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.finifunc,AT_FUNCTION)));
+            if entry^.initfunc<>'' then
+              initList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.initfunc,AT_FUNCTION)));
+          end;
+
+        genexit(finalList);
+        genexit(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_INIT_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_INIT_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        initList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_FINALIZE_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_FINALIZE_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        finalList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(finalList);
+
+        initList.Free;
+        finalList.Free;
+
+        inherited insert_init_final_table(entries);
+      end;
+
+  begin
+    cnodeutils:=tarmnodeutils;
+  end.
+

+ 51 - 0
compiler/arm/tripletcpu.pas

@@ -0,0 +1,51 @@
+{
+    Copyright (c) 2020 by Jonas Maebe
+
+    Construct the cpu part of the triplet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit tripletcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+
+implementation
+
+uses
+  globals, cutils, systems, cpuinfo;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+  begin
+    result:=lower(cputypestr[current_settings.cputype]);
+    { llvm replaces the cpu name with thumb for when generating thumb code}
+    if (tripletstyle=triplet_llvm) and
+       (current_settings.instructionset=is_thumb) then
+      result:='thumb'+copy(result,4,255);
+    if target_info.endian=endian_big then
+      result:=result+'be';
+  end;
+
+
+end.
+

+ 1027 - 0
compiler/armgen/aoptarm.pas

@@ -0,0 +1,1027 @@
+{
+    Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
+    Development Team
+
+    This unit implements an ARM optimizer object used commonly for ARM and AAarch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+Unit aoptarm;
+
+{$i fpcdefs.inc}
+
+{ $define DEBUG_PREREGSCHEDULER}
+{ $define DEBUG_AOPTCPU}
+
+Interface
+
+uses
+  cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
+
+Type
+  { while ARM and AAarch64 look not very similar at a first glance,
+    several optimizations can be shared between both }
+  TARMAsmOptimizer = class(TAsmOptimizer)
+    procedure DebugMsg(const s : string; p : tai);
+
+    function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
+    function RedundantMovProcess(var p: tai; hp1: tai): boolean;
+    function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
+
+    function OptPass1UXTB(var p: tai): Boolean;
+    function OptPass1UXTH(var p: tai): Boolean;
+    function OptPass1SXTB(var p: tai): Boolean;
+    function OptPass1SXTH(var p: tai): Boolean;
+    function OptPass1And(var p: tai): Boolean;
+  End;
+
+  function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+{$ifdef AARCH64}
+  function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
+{$endif AARCH64}
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
+
+  function RefsEqual(const r1, r2: treference): boolean;
+
+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+
+Implementation
+
+  uses
+    cutils,verbose,globtype,globals,
+    systems,
+    cpuinfo,
+    cgobj,procinfo,
+    aasmbase,aasmdata;
+
+
+{$ifdef DEBUG_AOPTCPU}
+  procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
+    begin
+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
+    end;
+{$else DEBUG_AOPTCPU}
+  procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
+    begin
+    end;
+{$endif DEBUG_AOPTCPU}
+
+  function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
+        ((cond = []) or (taicpu(instr).condition in cond)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op) and
+        ((cond = []) or (taicpu(instr).condition in cond)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+{$ifdef AARCH64}
+  function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        ((op = []) or (taicpu(instr).opcode in op)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+{$endif AARCH64}
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+    begin
+      result := (oper.typ = top_reg) and (oper.reg = reg);
+    end;
+
+
+  function RefsEqual(const r1, r2: treference): boolean;
+    begin
+      refsequal :=
+        (r1.offset = r2.offset) and
+        (r1.base = r2.base) and
+        (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+        (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
+        (r1.relsymbol = r2.relsymbol) and
+{$ifdef ARM}
+        (r1.signindex = r2.signindex) and
+{$endif ARM}
+        (r1.shiftimm = r2.shiftimm) and
+        (r1.addressmode = r2.addressmode) and
+        (r1.shiftmode = r2.shiftmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
+    end;
+
+
+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+    begin
+      result := oper1.typ = oper2.typ;
+
+      if result then
+        case oper1.typ of
+          top_const:
+            Result:=oper1.val = oper2.val;
+          top_reg:
+            Result:=oper1.reg = oper2.reg;
+          top_conditioncode:
+            Result:=oper1.cc = oper2.cc;
+          top_realconst:
+            Result:=oper1.val_real = oper2.val_real;
+          top_ref:
+            Result:=RefsEqual(oper1.ref^, oper2.ref^);
+          else Result:=false;
+        end
+    end;
+
+
+  function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    Out Next: tai; reg: TRegister): Boolean;
+    begin
+      Next:=Current;
+      repeat
+        Result:=GetNextInstruction(Next,Next);
+      until not (Result) or
+            not(cs_opt_level3 in current_settings.optimizerswitches) or
+            (Next.typ<>ait_instruction) or
+            RegInInstruction(reg,Next) or
+            is_calljmp(taicpu(Next).opcode)
+{$ifdef ARM}
+            or RegModifiedByInstruction(NR_PC,Next);
+{$endif ARM}
+    end;
+
+
+  function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
+    var
+      alloc,
+      dealloc : tai_regalloc;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
+        { We can't optimize if there is a shiftop }
+        (taicpu(movp).ops=2) and
+        MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
+        { don't mess with moves to fp }
+        (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
+        { the destination register of the mov might not be used beween p and movp }
+        not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+{$ifdef ARM}
+        { PC should be changed only by moves }
+        (taicpu(movp).oper[0]^.reg<>NR_PC) and
+        { cb[n]z are thumb instructions which require specific registers, with no wide forms }
+        (taicpu(p).opcode<>A_CBZ) and
+        (taicpu(p).opcode<>A_CBNZ) and
+        { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
+        not (
+          (taicpu(p).opcode in [A_MLA, A_MUL]) and
+          (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
+          (current_settings.cputype < cpu_armv6)
+        ) and
+{$endif ARM}
+        { Take care to only do this for instructions which REALLY load to the first register.
+          Otherwise
+            str reg0, [reg1]
+            mov reg2, reg0
+          will be optimized to
+            str reg2, [reg1]
+        }
+        RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
+        begin
+          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
+          if assigned(dealloc) then
+            begin
+              DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
+              result:=true;
+
+              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
+                and remove it if possible }
+              asml.Remove(dealloc);
+              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  alloc.free;
+                  dealloc.free;
+                end
+              else
+                asml.InsertAfter(dealloc,p);
+
+              { try to move the allocation of the target register }
+              GetLastInstruction(movp,hp1);
+              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  asml.InsertBefore(alloc,p);
+                  { adjust used regs }
+                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
+                end;
+
+              { finally get rid of the mov }
+              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              then
+                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
+              asml.remove(movp);
+              movp.free;
+            end;
+        end;
+    end;
+
+
+  function TARMAsmOptimizer.RedundantMovProcess(var p: tai;hp1: tai):boolean;
+    var
+      I: Integer;
+    begin
+      Result:=false;
+      {
+        change
+        mov r1, r0
+        add r1, r1, #1
+        to
+        add r1, r0, #1
+
+        Todo: Make it work for mov+cmp too
+
+        CAUTION! If this one is successful p might not be a mov instruction anymore!
+      }
+      if (taicpu(p).ops = 2) and
+         (taicpu(p).oper[1]^.typ = top_reg) and
+         (taicpu(p).oppostfix = PF_NONE) and
+
+         MatchInstruction(hp1, [A_ADD, A_ADC,
+{$ifdef ARM}
+                                A_RSB, A_RSC,
+{$endif ARM}
+                                A_SUB, A_SBC,
+                                A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
+                          [taicpu(p).condition], []) and
+         { MOV and MVN might only have 2 ops }
+         (taicpu(hp1).ops >= 2) and
+         MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
+         (taicpu(hp1).oper[1]^.typ = top_reg) and
+         (
+           (taicpu(hp1).ops = 2) or
+           (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
+         ) and
+{$ifdef AARCH64}
+         (taicpu(p).oper[1]^.reg<>NR_SP) and
+{$endif AARCH64}
+         not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+        { When we get here we still don't know if the registers match }
+          for I:=1 to 2 do
+            {
+              If the first loop was successful p will be replaced with hp1.
+              The checks will still be ok, because all required information
+              will also be in hp1 then.
+            }
+            if (taicpu(hp1).ops > I) and
+               MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
+{$ifdef ARM}
+               { prevent certain combinations on thumb(2), this is only a safe approximation }
+               and (not(GenerateThumbCode or GenerateThumb2Code) or
+                ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
+                 (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
+{$endif ARM}
+
+               then
+              begin
+                DebugMsg('Peephole RedundantMovProcess done', hp1);
+                taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
+                if p<>hp1 then
+                begin
+                  asml.remove(p);
+                  p.free;
+                  p:=hp1;
+                  Result:=true;
+                end;
+              end;
+        end;
+      end;
+
+
+  function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
+    var
+      hp1, hp2: tai;
+    begin
+      Result:=false;
+      {
+        change
+        uxtb reg2,reg1
+        strb reg2,[...]
+        dealloc reg2
+        to
+        strb reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
+        assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbStrb2Strb done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      {
+        change
+        uxtb reg2,reg1
+        uxth reg3,reg2
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbUxth2Uxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        uxtb reg2,reg1
+        uxtb reg3,reg2
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        uxtb reg2,reg1
+        and reg3,reg2,#0x*FF
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
+          taicpu(hp1).opcode:=A_UXTB;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
+    var
+      hp1: tai;
+    begin
+      Result:=false;
+      {
+        change
+        uxth reg2,reg1
+        strh reg2,[...]
+        dealloc reg2
+        to
+        strh reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UXTHStrh2Strh done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        uxth reg2,reg1
+        uxth reg3,reg2
+        dealloc reg2
+        to
+        uxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxthUxth2Uxth done', p);
+          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+          taicpu(hp1).opcode:=A_UXTH;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        uxth reg2,reg1
+        and reg3,reg2,#65535
+        dealloc reg2
+        to
+        uxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxthAndImm2Uxth done', p);
+          taicpu(hp1).opcode:=A_UXTH;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+           RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
+    var
+      hp1, hp2: tai;
+    begin
+      Result:=false;
+      {
+        change
+        sxtb reg2,reg1
+        strb reg2,[...]
+        dealloc reg2
+        to
+        strb reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
+        assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbStrb2Strb done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      {
+        change
+        sxtb reg2,reg1
+        sxth reg3,reg2
+        dealloc reg2
+        to
+        sxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbSxth2Sxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        sxtb reg2,reg1
+        sxtb reg3,reg2
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        sxtb reg2,reg1
+        and reg3,reg2,#0x*FF
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
+          taicpu(hp1).opcode:=A_SXTB;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+           RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
+    var
+      hp1: tai;
+    begin
+      Result:=false;
+      {
+        change
+        sxth reg2,reg1
+        strh reg2,[...]
+        dealloc reg2
+        to
+        strh reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SXTHStrh2Strh done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        sxth reg2,reg1
+        sxth reg3,reg2
+        dealloc reg2
+        to
+        sxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxthSxth2Sxth done', p);
+          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+          taicpu(hp1).opcode:=A_SXTH;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        sxth reg2,reg1
+        and reg3,reg2,#65535
+        dealloc reg2
+        to
+        sxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxthAndImm2Sxth done', p);
+          taicpu(hp1).opcode:=A_SXTH;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+           RemoveSuperfluousMove(p, hp1, 'SxthMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
+    var
+      hp1, hp2: tai;
+      i: longint;
+    begin
+      Result:=false;
+      {
+        optimize
+        and reg2,reg1,const1
+        ...
+      }
+      if (taicpu(p).ops>2) and
+         (taicpu(p).oper[1]^.typ = top_reg) and
+         (taicpu(p).oper[2]^.typ = top_const) then
+        begin
+          {
+            change
+            and reg2,reg1,const1
+            ...
+            and reg3,reg2,const2
+            to
+            and reg3,reg1,(const1 and const2)
+          }
+          if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+          MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
+          RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+          MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+          (taicpu(hp1).oper[2]^.typ = top_const)
+{$ifdef AARCH64}
+          and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
+               ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
+          ) or
+          ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
+{$endif AARCH64}
+          then
+            begin
+              if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
+                begin
+                  DebugMsg('Peephole AndAnd2And done', p);
+                  AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+                  if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
+                    begin
+                      DebugMsg('Peephole AndAnd2Mov0 1 done', p);
+                      taicpu(p).opcode:=A_MOV;
+                      taicpu(p).ops:=2;
+                      taicpu(p).loadConst(1,0);
+                      taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
+                    end
+                  else
+                    begin
+                      DebugMsg('Peephole AndAnd2And 1 done', p);
+                      taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
+                      taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
+                      taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+                    end;
+                  asml.remove(hp1);
+                  hp1.free;
+                  Result:=true;
+                  exit;
+                end
+              else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+                begin
+                  if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
+                    begin
+                      DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
+                      taicpu(hp1).opcode:=A_MOV;
+                      taicpu(hp1).loadConst(1,0);
+                      taicpu(hp1).ops:=2;
+                      taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
+                    end
+                  else
+                    begin
+                      DebugMsg('Peephole AndAnd2And 2 done', hp1);
+                      AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+                      taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
+                      taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
+                      taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                    end;
+                  GetNextInstruction(p, hp1);
+                  RemoveCurrentP(p);
+                  p:=hp1;
+                  Result:=true;
+                  exit;
+                end;
+            end
+          {
+            change
+            and reg2,reg1,$xxxxxxFF
+            strb reg2,[...]
+            dealloc reg2
+            to
+            strb reg1,[...]
+          }
+          else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
+            MatchInstruction(p, A_AND, [C_None], [PF_None]) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
+            assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+            { the reference in strb might not use reg2 }
+            not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+            { reg1 might not be modified inbetween }
+            not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+            begin
+              DebugMsg('Peephole AndStrb2Strb done', p);
+{$ifdef AARCH64}
+              taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
+{$else AARCH64}
+              taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+{$endif AARCH64}
+              AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+              RemoveCurrentP(p);
+              result:=true;
+              exit;
+            end
+          {
+            change
+            and reg2,reg1,255
+            uxtb/uxth reg3,reg2
+            dealloc reg2
+            to
+            and reg3,reg1,x
+          }
+          else if ((taicpu(p).oper[2]^.val and $ffffff00)=0) and
+            MatchInstruction(p, A_AND, [C_None], [PF_None]) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
+            (taicpu(hp1).ops = 2) and
+            RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+            MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+            { reg1 might not be modified inbetween }
+            not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+            begin
+              DebugMsg('Peephole AndUxt2And done', p);
+              taicpu(hp1).opcode:=A_AND;
+              taicpu(hp1).ops:=3;
+              taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+              taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
+              GetNextInstruction(p,hp1);
+              asml.remove(p);
+              p.Free;
+              p:=hp1;
+              result:=true;
+              exit;
+            end
+          else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
+            MatchInstruction(p, A_AND, [C_None], [PF_None]) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
+            (taicpu(hp1).ops = 2) and
+            RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+            MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+            { reg1 might not be modified inbetween }
+            not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+            begin
+              DebugMsg('Peephole AndSxt2And done', p);
+              taicpu(hp1).opcode:=A_AND;
+              taicpu(hp1).ops:=3;
+              taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+              taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
+              GetNextInstruction(p,hp1);
+              asml.remove(p);
+              p.Free;
+              p:=hp1;
+              result:=true;
+              exit;
+            end
+          {
+            from
+            and reg1,reg0,2^n-1
+            mov reg2,reg1, lsl imm1
+            (mov reg3,reg2, lsr/asr imm1)
+            remove either the and or the lsl/xsr sequence if possible
+          }
+
+          else if (taicpu(p).oper[2]^.val < high(int64)) and 
+	    cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
+            (taicpu(hp1).ops=3) and
+            MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+            (taicpu(hp1).oper[2]^.typ = top_shifterop) and
+{$ifdef ARM}
+            (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
+{$endif ARM}
+            (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
+            RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
+            begin
+              {
+                and reg1,reg0,2^n-1
+                mov reg2,reg1, lsl imm1
+                mov reg3,reg2, lsr/asr imm1
+                =>
+                and reg1,reg0,2^n-1
+                if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
+              }
+              if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
+                MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
+                (taicpu(hp2).ops=3) and
+                MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
+                (taicpu(hp2).oper[2]^.typ = top_shifterop) and
+{$ifdef ARM}
+                (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
+{$endif ARM}
+                (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
+                (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
+                RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
+                ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
+                ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
+                 (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
+                begin
+                  DebugMsg('Peephole AndLslXsr2And done', p);
+                  taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
+                  asml.Remove(hp1);
+                  asml.Remove(hp2);
+                  hp1.free;
+                  hp2.free;
+                  result:=true;
+                  exit;
+                end
+              {
+                and reg1,reg0,2^n-1
+                mov reg2,reg1, lsl imm1
+                =>
+                mov reg2,reg0, lsl imm1
+                if imm1>i
+              }
+              else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
+                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
+                begin
+                  DebugMsg('Peephole AndLsl2Lsl done', p);
+                  taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
+                  GetNextInstruction(p, hp1);
+                  asml.Remove(p);
+                  p.free;
+                  p:=hp1;
+                  result:=true;
+                  exit;
+                end
+            end;
+        end;
+      {
+        change
+        and reg1, ...
+        mov reg2, reg1
+        to
+        and reg2, ...
+      }
+      if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+         (taicpu(p).ops>=3) and
+         RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
+        Result:=true;
+    end;
+
+end.
+

+ 2 - 2
compiler/armgen/armpara.pas

@@ -43,7 +43,7 @@ type
 implementation
 
   uses
-    symconst,symdef,symsym,defutil;
+    symconst,symdef,symsym,symutil,defutil;
 
 
   function tarmgenparamanager.is_hfa(p: tdef; out basedef: tdef): boolean;
@@ -106,7 +106,7 @@ implementation
             for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
               begin
                 sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
-                if sym.typ<>fieldvarsym then
+                if not is_normal_fieldvarsym(sym) then
                   continue;
                 if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
                   exit

+ 160 - 19
compiler/assemble.pas

@@ -264,7 +264,7 @@ Implementation
 {$endif FPC_SOFT_FPUX80}
 {$endif}
       cscript,fmodule,verbose,
-      cpuinfo,
+      cpuinfo,triplet,
       aasmcpu;
 
     var
@@ -745,7 +745,7 @@ Implementation
 {$ifdef hasunix}
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
-                ((asminfo^.id in [as_gas,as_ggas,as_darwin,as_powerpc_xcoff,as_clang,as_solaris_as]));
+                ((asminfo^.id in [as_gas,as_ggas,as_darwin,as_powerpc_xcoff,as_clang_gas,as_clang_llvm,as_solaris_as]));
 {$else hasunix}
         DoPipe:=false;
 {$endif}
@@ -822,16 +822,20 @@ Implementation
       var
         asfound : boolean;
         UtilExe  : string;
+        asmbin : TCmdStr;
       begin
         asfound:=false;
+        asmbin:=asminfo^.asmbin;
+        if (af_llvm in asminfo^.flags) then
+          asmbin:=asmbin+llvmutilssuffix;
         if cs_link_on_target in current_settings.globalswitches then
          begin
            { If linking on target, don't add any path PM }
-           FindAssembler:=utilsprefix+ChangeFileExt(asminfo^.asmbin,target_info.exeext);
+           FindAssembler:=utilsprefix+ChangeFileExt(asmbin,target_info.exeext);
            exit;
          end
         else
-         UtilExe:=utilsprefix+ChangeFileExt(asminfo^.asmbin,source_info.exeext);
+         UtilExe:=utilsprefix+ChangeFileExt(asmbin,source_info.exeext);
         if lastas<>ord(asminfo^.id) then
          begin
            lastas:=ord(asminfo^.id);
@@ -925,17 +929,13 @@ Implementation
 
       begin
         result:=asminfo^.asmcmd;
-        { for Xcode 7.x and later }
-        if MacOSXVersionMin<>'' then
-          Replace(result,'$DARWINVERSION','-mmacosx-version-min='+MacOSXVersionMin)
-        else if iPhoneOSVersionMin<>'' then
-          Replace(result,'$DARWINVERSION','-miphoneos-version-min='+iPhoneOSVersionMin)
-        else
-          Replace(result,'$DARWINVERSION','');
+        if af_llvm in target_asm.flags then
+          Replace(result,'$TRIPLET',targettriplet(triplet_llvm))
 {$ifdef arm}
-        if (target_info.system=system_arm_darwin) then
-          Replace(result,'$ARCH',lower(cputypestr[current_settings.cputype]));
+        else if (target_info.system=system_arm_ios) then
+          Replace(result,'$ARCH',lower(cputypestr[current_settings.cputype]))
 {$endif arm}
+        ;
         if (cs_link_on_target in current_settings.globalswitches) then
          begin
            Replace(result,'$ASM',maybequoted(ScriptFixFileName(AsmFileName)));
@@ -945,7 +945,7 @@ Implementation
          begin
 {$ifdef hasunix}
           if DoPipe then
-            if asminfo^.id<>as_clang then
+            if not(asminfo^.id in [as_clang_gas,as_clang_asdarwin,as_clang_llvm]) then
               Replace(result,'$ASM','')
             else
               Replace(result,'$ASM','-')
@@ -1558,6 +1558,7 @@ Implementation
         objsym,
         objsymend : TObjSymbol;
         cpu: tcputype;
+        eabi_section, TmpSection: TObjSection;
       begin
         while assigned(hp) do
          begin
@@ -1615,9 +1616,11 @@ Implementation
                                     (objsym.objsection<>ObjData.CurrObjSec) then
                                    InternalError(200404124);
                                end
+{$push} {$R-}{$Q-}
                              else
                                Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                            end;
+{$pop}
                        end;
                    end;
                  ObjData.alloc(tai_const(hp).size);
@@ -1670,7 +1673,10 @@ Implementation
                end;
              ait_section:
                begin
-                 ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
+                 if Tai_section(hp).sectype=sec_user then
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).secflags,Tai_section(hp).secprogbits,Tai_section(hp).name^,Tai_section(hp).secorder)
+                 else
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
                  Tai_section(hp).sec:=ObjData.CurrObjSec;
                end;
              ait_symbol :
@@ -1694,6 +1700,28 @@ Implementation
              ait_cutobject :
                if SmartAsm then
                 break;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   begin
+                     TmpSection:=ObjData.CurrObjSec;
+                     ObjData.CreateSection(sec_arm_attribute,[],SPB_ARM_ATTRIBUTES,'',secorder_default);
+                     eabi_section:=ObjData.CurrObjSec;
+                     ObjData.setsection(TmpSection);
+                   end;
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100701);
+                 end;
+               end;
              else
                ;
            end;
@@ -1708,6 +1736,7 @@ Implementation
         objsym,
         objsymend : TObjSymbol;
         cpu: tcputype;
+        eabi_section: TObjSection;
       begin
         while assigned(hp) do
          begin
@@ -1762,15 +1791,23 @@ Implementation
                    begin
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
-                     if objsymend.objsection<>objsym.objsection then
+                     if Tai_const(hp).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092801);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
                        begin
                          if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
                             (objsym.objsection<>ObjData.CurrObjSec) then
                            internalerror(200905042);
                        end
+{$push} {$R-}{$Q-}
                      else
                        Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
+{$pop}
                  if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) then
                    Tai_const(hp).fixsize;
                  ObjData.alloc(tai_const(hp).size);
@@ -1843,6 +1880,23 @@ Implementation
                      internalerror(2010011102);
                  end;
                end;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100702);
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100703);
+                 end;
+               end;
              else
                ;
            end;
@@ -1875,6 +1929,10 @@ Implementation
         ccomp : comp;
         tmp    : word;
         cpu: tcputype;
+        ddword : dword;
+        eabi_section: TObjSection;
+        s: String;
+        TmpDataPos: TObjSectionOfs;
       begin
         fillchar(zerobuf,sizeof(zerobuf),0);
         fillchar(objsym,sizeof(objsym),0);
@@ -1981,8 +2039,29 @@ Implementation
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
                      relative_reloc:=(objsym.objsection<>objsymend.objsection);
-                     Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
+                     if Tai_const(hp).consttype in [aitconst_gottpoff] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092802);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if Tai_const(hp).consttype in [aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092802);
+                         Tai_const(hp).value:=ObjData.CurrObjSec.Size-objsymend.address+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
+                       begin
+                         if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
+                            (objsym.objsection<>ObjData.CurrObjSec) then
+                           internalerror(2019010301);
+                       end
+                     else
+{$push} {$R-}{$Q-}
+                       Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
+{$pop}
                  case tai_const(hp).consttype of
                    aitconst_64bit,
                    aitconst_32bit,
@@ -2003,7 +2082,7 @@ Implementation
                    aitconst_rva_symbol :
                      begin
                        { PE32+? }
-                       if target_info.system=system_x86_64_win64 then
+                       if target_info.system in systems_peoptplus then
                          ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_RVA)
                        else
                          ObjData.writereloc(Tai_const(hp).symofs,sizeof(pint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_RVA);
@@ -2035,7 +2114,23 @@ Implementation
 {$ifdef arm}
                    aitconst_got:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOT32);
+{                   aitconst_gottpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF); }
+                   aitconst_tpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF);
+                   aitconst_tlsgd:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSGD);
+                   aitconst_tlsdesc:
+                     begin
+                       { must be a relative symbol, thus value being valid }
+                       if not(assigned(tai_const(hp).sym)) or not(assigned(tai_const(hp).endsym)) then
+                         Internalerror(2019092904);
+                       ObjData.writereloc(Tai_const(hp).value,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSDESC);
+                     end;
 {$endif arm}
+                   aitconst_dtpoff:
+                     { so far, the size of dtpoff is fixed to 4 bytes }
+                     ObjData.writereloc(Tai_const(hp).symofs,4,Objdata.SymbolRef(tai_const(hp).sym),RELOC_DTPOFF);
                    aitconst_gotoff_symbol:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOTOFF);
                    aitconst_uleb128bit,
@@ -2135,6 +2230,52 @@ Implementation
              ait_seh_directive :
                tai_seh_directive(hp).generate_code(objdata);
 {$endif DISABLE_WIN64_SEH}
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100704);
+                 if eabi_section.Size=0 then
+                   begin
+                     s:='A';
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1;
+                     eabi_section.write(ddword,4);
+                     s:='aeabi'#0;
+                     eabi_section.write(s[1],6);
+                     s:=#1;
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1-4-6-1;
+                     eabi_section.write(ddword,4);
+                   end;
+                 leblen:=EncodeUleb128(tai_eabi_attribute(hp).tag,lebbuf,0);
+                 eabi_section.write(lebbuf,leblen);
+
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     begin
+                       leblen:=EncodeUleb128(tai_eabi_attribute(hp).value,lebbuf,0);
+                       eabi_section.write(lebbuf,leblen);
+                     end;
+                   eattrtype_ntbs:
+                     begin
+                       s:=tai_eabi_attribute(hp).valuestr^+#0;
+                       eabi_section.write(s[1],Length(s));
+                     end
+                   else
+                     Internalerror(2019100705);
+                 end;
+                 { update size of attributes section, write directly to the dyn. arrays as
+                   we do not increase the size of section }
+                 TmpDataPos:=eabi_section.Data.Pos;
+                 eabi_section.Data.seek(1);
+                 ddword:=eabi_section.Size-1;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.seek(12);
+                 ddword:=eabi_section.Size-1-4-6;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.Seek(TmpDataPos);
+               end;
              else
                ;
            end;
@@ -2391,7 +2532,7 @@ Implementation
       var
         asmkind: tasm;
       begin
-        for asmkind in [as_gas,as_ggas,as_darwin] do
+        for asmkind in [as_gas,as_ggas,as_darwin,as_clang_gas,as_clang_asdarwin] do
           if assigned(asminfos[asmkind]) and
              (target_info.system in asminfos[asmkind]^.supported_targets) then
             begin

+ 112 - 28
compiler/avr/aasmcpu.pas

@@ -108,7 +108,7 @@ uses
 
     { replaces cond. branches by rjmp/jmp and the inverse cond. branch if needed
       and transforms special instructions to valid instruction encodings }
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
 
 implementation
 
@@ -396,15 +396,42 @@ implementation
       end;
 
 
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
       var
         CurrOffset : longint;
-        curtai : tai;
+        curtai, firstinstruction: tai;
         again : boolean;
         l : tasmlabel;
         inasmblock : Boolean;
+
+      procedure remove_instruction;
+        var
+          i: Integer;
+          hp: tai;
+        begin
+          taicpu(firstinstruction).opcode:=A_SLEEP;
+          for i:=0 to taicpu(firstinstruction).opercnt-1 do
+            taicpu(firstinstruction).freeop(i);
+          taicpu(firstinstruction).opercnt:=0;
+          taicpu(firstinstruction).ops:=0;
+          firstinstruction:=tai(firstinstruction.Next);
+          while assigned(firstinstruction) do
+            begin
+              if firstinstruction.typ in [ait_symbol_end,ait_label] then
+                firstinstruction:=tai(firstinstruction.Next)
+              else
+                begin
+                  hp:=tai(firstinstruction.Next);
+                  list.Remove(firstinstruction);
+                  firstinstruction.free;
+                  firstinstruction:=hp;
+                end;
+            end;
+        end;
+
       begin
         again:=true;
+        Result:=true;
         while again do
           begin
             again:=false;
@@ -439,39 +466,96 @@ implementation
 
             curtai:=tai(list.first);
             inasmblock:=false;
+            firstinstruction:=nil;
             while assigned(curtai) do
               begin
                 case curtai.typ of
                   ait_instruction:
-                    case taicpu(curtai).opcode of
-                      A_BRxx:
-                        if (taicpu(curtai).oper[0]^.typ=top_ref) and ((taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
-                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63)) then
+                    begin
+                      if not(assigned(firstinstruction)) then
+                        firstinstruction:=curtai;
+                      case taicpu(curtai).opcode of
+                        A_BRxx:
+                          if (taicpu(curtai).oper[0]^.typ=top_ref) and ((taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
+                            (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63)) then
+                            begin
+                              if inasmblock then
+                                Message(asmw_e_brxx_out_of_range)
+                              else
+                                begin
+                                  current_asmdata.getjumplabel(l);
+                                  list.insertafter(tai_label.create(l),curtai);
+                                  if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
+                                    list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai)
+                                  else
+                                    list.insertafter(taicpu.op_sym(A_RJMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
+                                  taicpu(curtai).oper[0]^.ref^.symbol:=l;
+                                  taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
+                                  again:=true;
+                                end;
+                            end;
+                        A_JMP:
+                          { replace JMP by RJMP? ...
+                            ... but do not mess with asm block }
+                          if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
+                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
+                          { jmps to function go outside the currently considered scope, so do not mess with them.
+                            Those are generated by the peephole optimizer from call/ret sequences }
+                          not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
                           begin
-                            if inasmblock then
-                              Message(asmw_e_brxx_out_of_range)
-                            else
+                            taicpu(curtai).opcode:=A_RJMP;
+                            again:=true;
+                          end;
+                        A_STS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[0]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_OUT;
+                                    taicpu(curtai).loadconst(0,ref^.offset);
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_LDS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[1]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_IN;
+                                    taicpu(curtai).loadconst(1,ref^.offset)
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_SBIW,
+                        A_MULS,
+                        A_ICALL,
+                        A_IJMP,
+                        A_STD,
+                        A_LD,
+                        A_LDD,
+                        A_ST,
+                        A_ROR,
+                        A_POP,
+                        A_PUSH:
+                          begin
+                            { certain cpu types do not support some instructions, so replace them }
+                            if current_settings.cputype=cpu_avr1 then
                               begin
-                                current_asmdata.getjumplabel(l);
-                                list.insertafter(tai_label.create(l),curtai);
-                                list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
-                                taicpu(curtai).oper[0]^.ref^.symbol:=l;
-                                taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
-                                again:=true;
+                                remove_instruction;
+                                result:=false;
                               end;
                           end;
-                      A_JMP:
-                        { replace JMP by RJMP? ...
-                          ... but do not mess with asm block }
-                        if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
-                        (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
-                        { jmps to function go outside the currently considered scope, so do not mess with them.
-                          Those are generated by the peephole optimizer from call/ret sequences }
-                        not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
-                        begin
-                          taicpu(curtai).opcode:=A_RJMP;
-                          again:=true;
-                        end;
+                      end;
                     end;
                   ait_marker:
                     case tai_marker(curtai).Kind of

+ 3 - 2
compiler/avr/agavrgas.pas

@@ -122,7 +122,7 @@ unit agavrgas;
               else if assigned(symbol) or (offset<>0) then
                 begin
                   if assigned(symbol) then
-                    s:=ReplaceForbiddenAsmSymbolChars(symbol.name);
+                    s:=ApplyAsmSymbolRestrictions(symbol.name);
 
                   if s='' then
                     s:=tostr(offset)
@@ -165,7 +165,7 @@ unit agavrgas;
             top_ref:
               if o.ref^.refaddr=addr_full then
                 begin
-                  hs:=ReplaceForbiddenAsmSymbolChars(o.ref^.symbol.name);
+                  hs:=ApplyAsmSymbolRestrictions(o.ref^.symbol.name);
                   if o.ref^.offset>0 then
                     hs:=hs+'+'+tostr(o.ref^.offset)
                   else if o.ref^.offset<0 then
@@ -216,6 +216,7 @@ unit agavrgas;
             supported_targets : [system_avr_embedded];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: 's';
           );

+ 63 - 24
compiler/avr/aoptcpu.pas

@@ -318,9 +318,9 @@ Implementation
               GetNextInstruction(p, hp1) and
               ((MatchInstruction(hp1, A_CP) and
                 (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
-                  (taicpu(hp1).oper[1]^.reg = NR_R1)) or
+                  (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
                  ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
-                  (taicpu(hp1).oper[0]^.reg = NR_R1) and
+                  (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
                   (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
                                         A_LSL,A_LSR,
                                         A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
@@ -358,7 +358,7 @@ Implementation
                 // If we compare to the same value we are masking then invert the comparison
                 if (taicpu(hp1).opcode=A_CPI) or
                   { sub/sbc with reverted? }
-                  ((taicpu(hp1).oper[0]^.reg = NR_R1) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
+                  ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
                   taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
 
                 asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
@@ -377,14 +377,14 @@ Implementation
                   begin
                     { turn
                       ldi reg0, imm
-                      cp/mov reg1, reg0
+                      <op> reg1, reg0
                       dealloc reg0
                       into
-                      cpi/ldi reg1, imm
+                      <op>i reg1, imm
                     }
                     if MatchOpType(taicpu(p),top_reg,top_const) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
-                       MatchInstruction(hp1,[A_CP,A_MOV],2) and
+                       MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
                        MatchOpType(taicpu(hp1),top_reg,top_reg) and
                        (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
@@ -392,6 +392,8 @@ Implementation
                        not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
                       begin
                         TransferUsedRegs(TmpUsedRegs);
+                        UpdateUsedRegs(TmpUsedRegs,tai(p.next));
+                        UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
                         if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
                           begin
                             case taicpu(hp1).opcode of
@@ -399,6 +401,10 @@ Implementation
                                 taicpu(hp1).opcode:=A_CPI;
                               A_MOV:
                                 taicpu(hp1).opcode:=A_LDI;
+                              A_AND:
+                                taicpu(hp1).opcode:=A_ANDI;
+                              A_SUB:
+                                taicpu(hp1).opcode:=A_SUBI;
                               else
                                 internalerror(2016111901);
                             end;
@@ -415,7 +421,7 @@ Implementation
                                 dealloc.Free;
                               end;
 
-                            DebugMsg('Peephole LdiMov/Cp2Ldi/Cpi performed', p);
+                            DebugMsg('Peephole LdiOp2Opi performed', p);
 
                             RemoveCurrentP(p);
                           end;
@@ -427,13 +433,20 @@ Implementation
                     (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[0]^.ref^.offset>=32) and
-                    (taicpu(p).oper[0]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=0) and
+                      (taicpu(p).oper[0]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=32) and
+                      (taicpu(p).oper[0]^.ref^.offset<=95))) then
                     begin
                       DebugMsg('Peephole Sts2Out performed', p);
 
                       taicpu(p).opcode:=A_OUT;
-                      taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
                     end;
                 A_LDS:
                   if (taicpu(p).oper[1]^.ref^.symbol=nil) and
@@ -441,13 +454,20 @@ Implementation
                     (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[1]^.ref^.offset>=32) and
-                    (taicpu(p).oper[1]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=0) and
+                      (taicpu(p).oper[1]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=32) and
+                      (taicpu(p).oper[1]^.ref^.offset<=95))) then
                     begin
                       DebugMsg('Peephole Lds2In performed', p);
 
                       taicpu(p).opcode:=A_IN;
-                      taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
                     end;
                 A_IN:
                     if GetNextInstruction(p,hp1) then
@@ -653,18 +673,19 @@ Implementation
                   end;
                 A_ADD:
                   begin
-                    if (taicpu(p).oper[1]^.reg=NR_R1) and
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
                     GetNextInstruction(p, hp1) and
                     MatchInstruction(hp1,A_ADC) then
                     begin
                       DebugMsg('Peephole AddAdc2Add performed', p);
 
-                      result:=RemoveCurrentP(p);
+                      RemoveCurrentP(p, hp1);
+                      Result := True;
                     end;
                   end;
                 A_SUB:
                   begin
-                    if (taicpu(p).oper[1]^.reg=NR_R1) and
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
                     GetNextInstruction(p, hp1) and
                     MatchInstruction(hp1,A_SBC) then
                     begin
@@ -672,7 +693,8 @@ Implementation
 
                       taicpu(hp1).opcode:=A_SUB;
 
-                      result:=RemoveCurrentP(p);
+                      RemoveCurrentP(p, hp1);
+                      Result := True;
                     end;
                   end;
                 A_CLR:
@@ -719,7 +741,7 @@ Implementation
                       begin
                         DebugMsg('Peephole ClrAdc2Adc performed', p);
 
-                        taicpu(hp1).oper[1]^.reg:=NR_R1;
+                        taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
 
                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
@@ -762,7 +784,8 @@ Implementation
                        GetNextInstruction(hp2,hp3) and
                        MatchInstruction(hp3,A_POP) then
                       begin
-                       if (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
+                       if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
+                         (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
                          (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
@@ -774,9 +797,23 @@ Implementation
 
                            taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
 
-                           RemoveCurrentP(p);
-                           RemoveCurrentP(p);
-                           result:=RemoveCurrentP(p);
+                           { We're removing 3 concurrent instructions.  Remove hp1
+                             and hp2 manually instead of calling RemoveCurrentP
+                             as this means we won't be calling UpdateUsedRegs 3 times }
+                           asml.Remove(hp1);
+                           hp1.Free;
+
+                           asml.Remove(hp2);
+                           hp2.Free;
+
+                           { By removing p last, we've guaranteed that p.Next is
+                             valid (storing it prior to removing the instructions
+                             may result in a dangling pointer if hp1 immediately
+                             follows p), and because hp1, hp2 and hp3 came from
+                             sequential calls to GetNextInstruction, it is
+                             guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
+                           RemoveCurrentP(p, hp3);
+                           Result := True;
                          end
                        else
                          begin
@@ -862,7 +899,8 @@ Implementation
                           not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
                           begin
                             DebugMsg('Peephole Mov2Nop performed', p);
-                            result:=RemoveCurrentP(p);
+                            RemoveCurrentP(p, hp1);
+                            Result := True;
                             exit;
                           end;
                       end;
@@ -1071,7 +1109,8 @@ Implementation
                         begin
                           DebugMsg('Peephole MovMov2Mov performed', p);
 
-                          result:=RemoveCurrentP(p);
+                          RemoveCurrentP(p,hp1);
+                          Result := True;
 
                           GetNextInstruction(hp1,hp1);
                           if not assigned(hp1) then

+ 8 - 7
compiler/avr/ccpuinnr.inc

@@ -12,10 +12,11 @@
 
  **********************************************************************}
 
-  in_avr_cli = fpc_in_cpu_first,
-  in_avr_sei = fpc_in_cpu_first+1,
-  in_avr_wdr = fpc_in_cpu_first+2,
-  in_avr_sleep = fpc_in_cpu_first+3,
-  in_avr_nop = fpc_in_cpu_first+4,
-  in_avr_save = fpc_in_cpu_first+5,
-  in_avr_restore = fpc_in_cpu_first+6
+  in_avr_cli = in_cpu_first,
+  in_avr_sei = in_cpu_first+1,
+  in_avr_wdr = in_cpu_first+2,
+  in_avr_sleep = in_cpu_first+3,
+  in_avr_nop = in_cpu_first+4,
+  in_avr_save = in_cpu_first+5,
+  in_avr_restore = in_cpu_first+6
+

+ 354 - 215
compiler/avr/cgcpu.pas

@@ -114,6 +114,8 @@ unit cgcpu;
 
         procedure gen_multiply(list: TAsmList; op: topcg; size: TCgSize; src2, src1, dst: tregister; check_overflow: boolean; var ovloc: tlocation);
 
+      private
+       procedure a_op_const_reg_reg_internal(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, srchi, dst, dsthi: tregister);
       protected
         procedure a_op_reg_reg_internal(list: TAsmList; Op: TOpCG; size: TCGSize; src, srchi, dst, dsthi: TRegister);
         procedure a_op_const_reg_internal(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg, reghi: TRegister);
@@ -123,6 +125,7 @@ unit cgcpu;
       tcg64favr = class(tcg64f32)
         procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
         procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
+        procedure a_op64_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; value: int64;src,dst: tregister64);override;
       end;
 
     procedure create_codegen;
@@ -145,10 +148,14 @@ unit cgcpu;
     procedure tcgavr.init_register_allocators;
       begin
         inherited init_register_allocators;
-        rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
-            [RS_R18,RS_R19,RS_R20,RS_R21,RS_R22,RS_R23,RS_R24,RS_R25,
-             RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,
-             RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15,RS_R16,RS_R17],first_int_imreg,[]);
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
+              [RS_R18,RS_R19,RS_R20,RS_R21,RS_R22,RS_R23,RS_R24,RS_R25],first_int_imreg,[])
+        else
+          rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
+              [RS_R18,RS_R19,RS_R20,RS_R21,RS_R22,RS_R23,RS_R24,RS_R25,
+               RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,
+               RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15,RS_R16,RS_R17],first_int_imreg,[]);
       end;
 
 
@@ -199,8 +206,12 @@ unit cgcpu;
                a_load_reg_reg(list,paraloc^.size,paraloc^.size,r,paraloc^.register);
              LOC_REFERENCE,LOC_CREFERENCE:
                begin
-                  reference_reset_base(ref,paraloc^.reference.index,paraloc^.reference.offset,ctempposinvalid,2,[]);
-                  a_load_reg_ref(list,paraloc^.size,paraloc^.size,r,ref);
+                 reference_reset_base(ref,paraloc^.reference.index,paraloc^.reference.offset,ctempposinvalid,2,[]);
+                 if ref.base<>NR_STACK_POINTER_REG then
+                   Internalerror(2020011801);
+
+                 { as AVR allows no stack indirect addressing, everything else than a push makes no sense }
+                 list.concat(taicpu.op_reg(A_PUSH,r));
                end;
              else
                internalerror(2002071004);
@@ -212,91 +223,49 @@ unit cgcpu;
         hp : PCGParaLocation;
 
       begin
-{        if use_push(cgpara) then
-          begin
-            if tcgsize2size[cgpara.Size] > 2 then
-              begin
-                if tcgsize2size[cgpara.Size] <> 4 then
-                  internalerror(2013031101);
-                if cgpara.location^.Next = nil then
-                  begin
-                    if tcgsize2size[cgpara.location^.size] <> 4 then
-                      internalerror(2013031101);
-                  end
-                else
-                  begin
-                    if tcgsize2size[cgpara.location^.size] <> 2 then
-                      internalerror(2013031101);
-                    if tcgsize2size[cgpara.location^.Next^.size] <> 2 then
-                      internalerror(2013031101);
-                    if cgpara.location^.Next^.Next <> nil then
-                      internalerror(2013031101);
-                  end;
+        if not(tcgsize2size[cgpara.Size] in [1..4]) then
+          internalerror(2014011101);
 
-                if tcgsize2size[cgpara.size]>cgpara.alignment then
-                  pushsize:=cgpara.size
-                else
-                  pushsize:=int_cgsize(cgpara.alignment);
-                pushsize2 := int_cgsize(tcgsize2size[pushsize] - 2);
-                list.concat(taicpu.op_reg(A_PUSH,TCgsize2opsize[pushsize2],makeregsize(list,GetNextReg(r),pushsize2)));
-                list.concat(taicpu.op_reg(A_PUSH,S_W,makeregsize(list,r,OS_16)));
-              end
-            else
-              begin
-                cgpara.check_simple_location;
-                if tcgsize2size[cgpara.location^.size]>cgpara.alignment then
-                  pushsize:=cgpara.location^.size
-                else
-                  pushsize:=int_cgsize(cgpara.alignment);
-                list.concat(taicpu.op_reg(A_PUSH,TCgsize2opsize[pushsize],makeregsize(list,r,pushsize)));
-              end;
+        hp:=cgpara.location;
 
-          end
-        else }
+        i:=0;
+        while i<tcgsize2size[cgpara.Size] do
           begin
-            if not(tcgsize2size[cgpara.Size] in [1..4]) then
-              internalerror(2014011101);
+            if not(assigned(hp)) then
+              internalerror(2014011102);
 
-            hp:=cgpara.location;
+            inc(i, tcgsize2size[hp^.Size]);
 
-            i:=0;
-            while i<tcgsize2size[cgpara.Size] do
+            if hp^.Loc=LOC_REGISTER then
               begin
-                if not(assigned(hp)) then
-                  internalerror(2014011102);
-
-                inc(i, tcgsize2size[hp^.Size]);
-
-                if hp^.Loc=LOC_REGISTER then
-                  begin
-                    load_para_loc(r,hp);
-                    hp:=hp^.Next;
-                    { check if we are not in the last iteration to avoid an internalerror in GetNextReg }
-                    if i<tcgsize2size[cgpara.Size] then
-                      r:=GetNextReg(r);
-                  end
-                else
-                  begin
-                    load_para_loc(r,hp);
+                load_para_loc(r,hp);
+                hp:=hp^.Next;
+                { check if we are not in the last iteration to avoid an internalerror in GetNextReg }
+                if i<tcgsize2size[cgpara.Size] then
+                  r:=GetNextReg(r);
+              end
+            else
+              begin
+                load_para_loc(r,hp);
 
-                    if i<tcgsize2size[cgpara.Size] then
-                      for i2:=1 to tcgsize2size[hp^.Size] do
-                        r:=GetNextReg(r);
+                if i<tcgsize2size[cgpara.Size] then
+                  for i2:=1 to tcgsize2size[hp^.Size] do
+                    r:=GetNextReg(r);
 
-                    hp:=hp^.Next;
-                  end;
+                hp:=hp^.Next;
               end;
-            if assigned(hp) then
-              internalerror(2014011103);
           end;
+        if assigned(hp) then
+          internalerror(2014011103);
       end;
 
 
     procedure tcgavr.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
       var
-        i : longint;
+        i,j : longint;
         hp : PCGParaLocation;
         ref: treference;
+        tmpreg: TRegister;
       begin
         if not(tcgsize2size[paraloc.Size] in [1..4]) then
           internalerror(2014011101);
@@ -322,11 +291,13 @@ unit cgcpu;
                 end;
               LOC_REFERENCE,LOC_CREFERENCE:
                 begin
-                  reference_reset(ref,paraloc.alignment,[]);
-                  ref.base:=hp^.reference.index;
-                  ref.offset:=hp^.reference.offset;
-                  a_load_const_ref(list,hp^.size,a shr (8*(i-1)),ref);
-
+                  for j:=1 to tcgsize2size[hp^.size] do
+                    begin
+                      tmpreg:=getintregister(list,OS_8);
+                      a_load_const_reg(list,OS_8,(a shr (8*(i-1+j-1))) and $ff,tmpreg);
+                      { as AVR allows no stack indirect addressing, everything else than a push makes no sense }
+                      list.concat(taicpu.op_reg(A_PUSH,tmpreg));
+                    end;
                   inc(i,tcgsize2size[hp^.size]);
                   hp:=hp^.Next;
                 end;
@@ -342,6 +313,8 @@ unit cgcpu;
         tmpref, ref: treference;
         location: pcgparalocation;
         sizeleft: tcgint;
+        i: Integer;
+        tmpreg: TRegister;
       begin
         location := paraloc.location;
         tmpref := r;
@@ -354,15 +327,14 @@ unit cgcpu;
                 a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
               LOC_REFERENCE:
                 begin
-                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
-                  { doubles in softemu mode have a strange order of registers and references }
-                  if location^.size=OS_32 then
-                    g_concatcopy(list,tmpref,ref,4)
-                  else
+                  ref:=tmpref;
+                  for i:=1 to sizeleft do
                     begin
-                      g_concatcopy(list,tmpref,ref,sizeleft);
-                      if assigned(location^.next) then
-                        internalerror(2005010710);
+                      tmpreg:=getintregister(list,OS_8);
+                      a_load_ref_reg(list,OS_8,OS_8,tmpref,tmpreg);
+                      { as AVR allows no stack indirect addressing, everything else than a push makes no sense }
+                      list.concat(taicpu.op_reg(A_PUSH,tmpreg));
+                      inc(tmpref.offset);
                     end;
                 end;
               LOC_VOID:
@@ -438,6 +410,18 @@ unit cgcpu;
 
 
      procedure tcgavr.a_op_const_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister);
+       begin
+         a_op_const_reg_reg_internal(list,op,size,a,src,NR_NO,dst,NR_NO);
+       end;
+
+
+     procedure tcgavr.a_op_const_reg_reg_internal(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src,srchi,dst,dsthi: tregister);
+       var
+         tmpSrc, tmpDst, countreg: TRegister;
+         b, b2, i, j: byte;
+         s1, s2, t1: integer;
+         l1: TAsmLabel;
+         oldexecutionweight: LongInt;
        begin
          if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) and (a in [2,4,8]) then
            begin
@@ -451,6 +435,105 @@ unit cgcpu;
                  a:=a shr 1;
                end;
            end
+
+         else if (op in [OP_SHL,OP_SHR]) and
+           { a=0 get eliminated later by tcg.optimize_op_const }
+           (a>0)  then
+           begin
+             { number of bytes to shift }
+             b:=a div 8;
+
+             { Ensure that b is never larger than base type }
+             if b>tcgsize2size[size] then
+               begin
+                 b:=tcgsize2size[size];
+                 b2:=0;
+               end
+             else
+               b2:=a mod 8;
+
+             if b < tcgsize2size[size] then
+               { copy from src to dst accounting for shift offset }
+               for i:=0 to (tcgsize2size[size]-b-1) do
+                 if op=OP_SHL then
+                   a_load_reg_reg(list,OS_8,OS_8,
+                     GetOffsetReg64(src,srchi,i),
+                     GetOffsetReg64(dst,dsthi,i+b))
+                 else
+                   a_load_reg_reg(list,OS_8,OS_8,
+                     GetOffsetReg64(src,srchi,i+b),
+                     GetOffsetReg64(dst,dsthi,i));
+
+             { remaining bit shifts }
+             if b2 > 0 then
+               begin
+                 { Cost of loop }
+                 s1:=3+tcgsize2size[size]-b;
+                 t1:=b2*(tcgsize2size[size]-b+3);
+                 { Cost of loop unrolling,t2=s2 }
+                 s2:=b2*(tcgsize2size[size]-b);
+
+                 if ((cs_opt_size in current_settings.optimizerswitches) and (s1<s2)) or
+                    (((s2-s1)-t1/s2)>0) then
+                   begin
+                     { Shift non-moved bytes in loop }
+                     current_asmdata.getjumplabel(l1);
+                     countreg:=getintregister(list,OS_8);
+                     a_load_const_reg(list,OS_8,b2,countreg);
+                     cg.a_label(list,l1);
+                     oldexecutionweight:=executionweight;
+                     executionweight:=executionweight*b2;
+                     if op=OP_SHL then
+                       list.concat(taicpu.op_reg(A_LSL,GetOffsetReg64(dst,dsthi,b)))
+                     else
+                       list.concat(taicpu.op_reg(A_LSR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-1-b)));
+
+                     if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
+                       begin
+                         for i:=2+b to tcgsize2size[size] do
+                           if op=OP_SHL then
+                             list.concat(taicpu.op_reg(A_ROL,GetOffsetReg64(dst,dsthi,i-1)))
+                           else
+                             list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i)));
+                       end;
+                     list.concat(taicpu.op_reg(A_DEC,countreg));
+                     a_jmp_flags(list,F_NE,l1);
+                     executionweight:=oldexecutionweight;
+                     { keep registers alive }
+                     a_reg_sync(list,countreg);
+                   end
+                 else
+                   begin
+                     { Unroll shift loop over non-moved bytes }
+                     for j:=1 to b2 do
+                     begin
+                       if op=OP_SHL then
+                         list.concat(taicpu.op_reg(A_LSL,
+                         GetOffsetReg64(dst,dsthi,b)))
+                       else
+                         list.concat(taicpu.op_reg(A_LSR,
+                           GetOffsetReg64(dst,dsthi,tcgsize2size[size]-b-1)));
+
+                       if not(size in [OS_8,OS_S8]) then
+                         for i:=2 to tcgsize2size[size]-b do
+                           if op=OP_SHL then
+                             list.concat(taicpu.op_reg(A_ROL,
+                               GetOffsetReg64(dst,dsthi,b+i-1)))
+                           else
+                             list.concat(taicpu.op_reg(A_ROR,
+                               GetOffsetReg64(dst,dsthi,tcgsize2size[size]-b-i)));
+                     end;
+                   end;
+               end;
+
+               { fill skipped destination registers with 0
+                 Do last,then optimizer can optimize register moves }
+               for i:=1 to b do
+                 if op=OP_SHL then
+                   emit_mov(list,GetOffsetReg64(dst,dsthi,i-1),GetDefaultZeroReg)
+                 else
+                   emit_mov(list,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i),GetDefaultZeroReg);
+           end
          else
            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
        end;
@@ -647,7 +730,7 @@ unit cgcpu;
                      if not(size in [OS_8,OS_S8]) then
                        begin
                          list.concat(taicpu.op_none(A_CLC));
-                         list.concat(taicpu.op_reg_const(A_SBRC,src,0));
+                         list.concat(taicpu.op_reg_const(A_SBRC,dst,0));
                          list.concat(taicpu.op_none(A_SEC));
                        end;
                      list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-1)));
@@ -687,8 +770,8 @@ unit cgcpu;
 
                list.concat(taicpu.op_reg(A_DEC,countreg));
                a_jmp_flags(list,F_NE,l1);
-               // keep registers alive
-               list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
+               { keep registers alive }
+               a_reg_sync(list,countreg);
                cg.a_label(list,l2);
              end;
 
@@ -771,9 +854,14 @@ unit cgcpu;
                for i:=1 to tcgsize2size[size] do
                  begin
                    if ((qword(a) and mask) shr shift)=0 then
-                     list.concat(taicpu.op_reg_reg(A_MOV,reg,NR_R1))
+                     list.concat(taicpu.op_reg_reg(A_MOV,reg,GetDefaultZeroReg))
                    else if ((qword(a) and mask) shr shift)<>$ff then
-                     list.concat(taicpu.op_reg_const(A_ANDI,reg,(qword(a) and mask) shr shift));
+                     begin
+                       getcpuregister(list,NR_R26);
+                       list.concat(taicpu.op_reg_const(A_LDI,NR_R26,(qword(a) and mask) shr shift));
+                       list.concat(taicpu.op_reg_reg(A_AND,reg,NR_R26));
+                       ungetcpuregister(list,NR_R26);
+                     end;
                    { check if we are not in the last iteration to avoid an internalerror in GetNextReg }
                    if i<tcgsize2size[size] then
                      NextRegPostInc;
@@ -786,7 +874,12 @@ unit cgcpu;
                if ((a and mask)=1) and (tcgsize2size[size]=1) then
                  list.concat(taicpu.op_reg(A_DEC,reg))
                else
-                 list.concat(taicpu.op_reg_const(A_SUBI,reg,a and mask));
+                 begin
+                   getcpuregister(list,NR_R26);
+                   list.concat(taicpu.op_reg_const(A_LDI,NR_R26,a and mask));
+                   list.concat(taicpu.op_reg_reg(A_SUB,reg,NR_R26));
+                   ungetcpuregister(list,NR_R26);
+                 end;
                if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
                  begin
                    for i:=2 to tcgsize2size[size] do
@@ -798,7 +891,7 @@ unit cgcpu;
                        { decrease pressure on upper half of registers by using SBC ...,R1 instead
                          of SBCI ...,0 }
                        if curvalue=0 then
-                         list.concat(taicpu.op_reg_reg(A_SBC,reg,NR_R1))
+                         list.concat(taicpu.op_reg_reg(A_SBC,reg,GetDefaultZeroReg))
                        else
                          list.concat(taicpu.op_reg_const(A_SBCI,reg,curvalue));
                      end;
@@ -895,7 +988,7 @@ unit cgcpu;
              begin
                curvalue:=a and mask;
                if curvalue=0 then
-                 list.concat(taicpu.op_reg_reg(A_ADD,reg,NR_R1))
+                 list.concat(taicpu.op_reg_reg(A_ADD,reg,GetDefaultZeroReg))
                else if (curvalue=1) and (tcgsize2size[size]=1) then
                  list.concat(taicpu.op_reg(A_INC,reg))
                else
@@ -915,7 +1008,7 @@ unit cgcpu;
                        { decrease pressure on upper half of registers by using ADC ...,R1 instead
                          of ADD ...,0 }
                        if curvalue=0 then
-                         list.concat(taicpu.op_reg_reg(A_ADC,reg,NR_R1))
+                         list.concat(taicpu.op_reg_reg(A_ADC,reg,GetDefaultZeroReg))
                        else
                          begin
                            tmpreg:=getintregister(list,OS_8);
@@ -943,7 +1036,7 @@ unit cgcpu;
                      tmpreg:=reg;
                      for i:=1 to 4 do
                        begin
-                         list.concat(taicpu.op_reg_reg(A_MOV,tmpreg,NR_R1));
+                         list.concat(taicpu.op_reg_reg(A_MOV,tmpreg,GetDefaultZeroReg));
                          tmpreg:=GetNextReg(tmpreg);
                        end;
                    end
@@ -971,7 +1064,7 @@ unit cgcpu;
          for i:=1 to tcgsize2size[size] do
            begin
              if ((qword(a) and mask) shr shift)=0 then
-               emit_mov(list,reg,NR_R1)
+               emit_mov(list,reg,GetDefaultZeroReg)
              else
                begin
                  getcpuregister(list,NR_R26);
@@ -998,7 +1091,6 @@ unit cgcpu;
 
 
     function tcgavr.normalize_ref(list:TAsmList;ref: treference;tmpreg : tregister) : treference;
-
       var
         tmpref : treference;
         l : tasmlabel;
@@ -1011,9 +1103,6 @@ unit cgcpu;
         { Be sure to have a base register }
         if (ref.base=NR_NO) then
           begin
-            { only symbol+offset? }
-            if ref.index=NR_NO then
-              exit;
             ref.base:=ref.index;
             ref.index:=NR_NO;
           end;
@@ -1101,7 +1190,9 @@ unit cgcpu;
             emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.index));
             ref.base:=tmpreg;
             ref.index:=NR_NO;
-          end;
+          end
+        else
+          Internalerror(2020011901);
         Result:=ref;
       end;
 
@@ -1131,7 +1222,8 @@ unit cgcpu;
              if not((href.addressmode=AM_UNCHANGED) and
                     (href.symbol=nil) and
                      (href.Index=NR_NO) and
-                     (href.Offset in [0..64-tcgsize2size[fromsize]])) then
+                     (href.Offset in [0..64-tcgsize2size[fromsize]])) or
+                (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
                begin
                  href:=normalize_ref(list,href,NR_R30);
                  getcpuregister(list,NR_R30);
@@ -1181,7 +1273,7 @@ unit cgcpu;
                        else
                          href.addressmode:=AM_UNCHANGED;
 
-                       list.concat(taicpu.op_ref_reg(GetStore(href),href,NR_R1));
+                       list.concat(taicpu.op_ref_reg(GetStore(href),href,GetDefaultZeroReg));
                      end;
                  end;
                OS_S8:
@@ -1193,7 +1285,7 @@ unit cgcpu;
                    if tcgsize2size[tosize]>1 then
                      begin
                        tmpreg:=getintregister(list,OS_8);
-                       emit_mov(list,tmpreg,NR_R1);
+                       emit_mov(list,tmpreg,GetDefaultZeroReg);
                        list.concat(taicpu.op_reg_const(A_SBRC,reg,7));
                        list.concat(taicpu.op_reg(A_COM,tmpreg));
                        for i:=2 to tcgsize2size[tosize] do
@@ -1235,7 +1327,7 @@ unit cgcpu;
                        else
                          href.addressmode:=AM_UNCHANGED;
 
-                       list.concat(taicpu.op_ref_reg(GetStore(href),href,NR_R1));
+                       list.concat(taicpu.op_ref_reg(GetStore(href),href,GetDefaultZeroReg));
                      end;
                  end;
                OS_S16:
@@ -1257,7 +1349,7 @@ unit cgcpu;
                    if tcgsize2size[tosize]>2 then
                      begin
                        tmpreg:=getintregister(list,OS_8);
-                       emit_mov(list,tmpreg,NR_R1);
+                       emit_mov(list,tmpreg,GetDefaultZeroReg);
                        list.concat(taicpu.op_reg_const(A_SBRC,reg,7));
                        list.concat(taicpu.op_reg(A_COM,tmpreg));
                        for i:=3 to tcgsize2size[tosize] do
@@ -1283,8 +1375,11 @@ unit cgcpu;
              // Write to 16 bit ioreg, first high byte then low byte
              // sequence required for 16 bit timer registers
              // See e.g. atmega328p manual para 15.3 Accessing 16 bit registers
-             if (fromsize in [OS_16, OS_S16]) and QuickRef and (href.offset > 31)
-               and (href.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then
+             // Avrxmega3: write low byte first then high byte
+             // See e.g. megaAVR-0 family data sheet 7.5.6 Accessing 16-bit registers
+             if (current_settings.cputype <> cpu_avrxmega3) and
+               (fromsize in [OS_16, OS_S16]) and QuickRef and (href.offset > 31) and
+               (href.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then
                begin
                  tmpreg:=GetNextReg(reg);
                  href.addressmode:=AM_UNCHANGED;
@@ -1348,7 +1443,8 @@ unit cgcpu;
              if not((href.addressmode=AM_UNCHANGED) and
                     (href.symbol=nil) and
                      (href.Index=NR_NO) and
-                     (href.Offset in [0..64-tcgsize2size[fromsize]])) then
+                     (href.Offset in [0..64-tcgsize2size[fromsize]])) or
+                (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
                begin
                  href:=normalize_ref(list,href,NR_R30);
                  getcpuregister(list,NR_R30);
@@ -1388,7 +1484,7 @@ unit cgcpu;
                    for i:=2 to tcgsize2size[tosize] do
                      begin
                        reg:=GetNextReg(reg);
-                       emit_mov(list,reg,NR_R1);
+                       emit_mov(list,reg,GetDefaultZeroReg);
                      end;
                  end;
                OS_S8:
@@ -1399,7 +1495,7 @@ unit cgcpu;
                    if tcgsize2size[tosize]>1 then
                      begin
                        reg:=GetNextReg(reg);
-                       emit_mov(list,reg,NR_R1);
+                       emit_mov(list,reg,GetDefaultZeroReg);
                        list.concat(taicpu.op_reg_const(A_SBRC,tmpreg,7));
                        list.concat(taicpu.op_reg(A_COM,reg));
                        tmpreg:=reg;
@@ -1426,7 +1522,7 @@ unit cgcpu;
                    for i:=3 to tcgsize2size[tosize] do
                      begin
                        reg:=GetNextReg(reg);
-                       emit_mov(list,reg,NR_R1);
+                       emit_mov(list,reg,GetDefaultZeroReg);
                      end;
                  end;
                OS_S16:
@@ -1443,7 +1539,7 @@ unit cgcpu;
                    tmpreg:=reg;
 
                    reg:=GetNextReg(reg);
-                   emit_mov(list,reg,NR_R1);
+                   emit_mov(list,reg,GetDefaultZeroReg);
                    list.concat(taicpu.op_reg_const(A_SBRC,tmpreg,7));
                    list.concat(taicpu.op_reg(A_COM,reg));
                    tmpreg:=reg;
@@ -1507,7 +1603,7 @@ unit cgcpu;
                    for i:=2 to tcgsize2size[tosize] do
                      begin
                        reg2:=GetNextReg(reg2);
-                       emit_mov(list,reg2,NR_R1);
+                       emit_mov(list,reg2,GetDefaultZeroReg);
                      end;
                  end;
                OS_S8:
@@ -1517,7 +1613,7 @@ unit cgcpu;
                    if tcgsize2size[tosize]>1 then
                      begin
                        reg2:=GetNextReg(reg2);
-                       emit_mov(list,reg2,NR_R1);
+                       emit_mov(list,reg2,GetDefaultZeroReg);
                        list.concat(taicpu.op_reg_const(A_SBRC,reg1,7));
                        list.concat(taicpu.op_reg(A_COM,reg2));
                        tmpreg:=reg2;
@@ -1539,7 +1635,7 @@ unit cgcpu;
                    for i:=3 to tcgsize2size[tosize] do
                      begin
                        reg2:=GetNextReg(reg2);
-                       emit_mov(list,reg2,NR_R1);
+                       emit_mov(list,reg2,GetDefaultZeroReg);
                      end;
                  end;
                OS_S16:
@@ -1553,7 +1649,7 @@ unit cgcpu;
                    if tcgsize2size[tosize]>2 then
                      begin
                        reg2:=GetNextReg(reg2);
-                       emit_mov(list,reg2,NR_R1);
+                       emit_mov(list,reg2,GetDefaultZeroReg);
                        list.concat(taicpu.op_reg_const(A_SBRC,reg1,7));
                        list.concat(taicpu.op_reg(A_COM,reg2));
                        tmpreg:=reg2;
@@ -1645,22 +1741,22 @@ unit cgcpu;
                 for i:=2 to tcgsize2size[size] do
                   reg:=GetNextReg(reg);
 
-                list.concat(taicpu.op_reg_reg(A_CP,reg,NR_R1));
+                list.concat(taicpu.op_reg_reg(A_CP,reg,GetDefaultZeroReg));
               end
             else
               begin
                 if swapped then
-                  list.concat(taicpu.op_reg_reg(A_CP,NR_R1,reg))
+                  list.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,reg))
                 else
-                  list.concat(taicpu.op_reg_reg(A_CP,reg,NR_R1));
+                  list.concat(taicpu.op_reg_reg(A_CP,reg,GetDefaultZeroReg));
 
                 for i:=2 to tcgsize2size[size] do
                   begin
                     reg:=GetNextReg(reg);
                     if swapped then
-                      list.concat(taicpu.op_reg_reg(A_CPC,NR_R1,reg))
+                      list.concat(taicpu.op_reg_reg(A_CPC,GetDefaultZeroReg,reg))
                     else
-                      list.concat(taicpu.op_reg_reg(A_CPC,reg,NR_R1));
+                      list.concat(taicpu.op_reg_reg(A_CPC,reg,GetDefaultZeroReg));
                   end;
               end;
 
@@ -1761,6 +1857,8 @@ unit cgcpu;
       var
         l : TAsmLabel;
         tmpflags : TResFlags;
+        i: Integer;
+        hreg: TRegister;
       begin
         current_asmdata.getjumplabel(l);
         {
@@ -1768,7 +1866,7 @@ unit cgcpu;
           begin
             tmpflags:=f;
             inverse_flags(tmpflags);
-            emit_mov(reg,NR_R1);
+            emit_mov(reg,GetDefaultZeroReg);
             a_jmp_flags(list,tmpflags,l);
             list.concat(taicpu.op_reg_const(A_LDI,reg,1));
           end
@@ -1776,8 +1874,15 @@ unit cgcpu;
         }
           begin
             list.concat(taicpu.op_reg_const(A_LDI,reg,1));
+            hreg:=reg;
+            for i:=2 to tcgsize2size[size] do
+              begin
+                hreg:=GetNextReg(hreg);
+                emit_mov(list,hreg,GetDefaultZeroReg);
+              end;
+
             a_jmp_flags(list,f,l);
-            emit_mov(list,reg,NR_R1);
+            emit_mov(list,reg,GetDefaultZeroReg);
           end;
         cg.a_label(list,l);
       end;
@@ -1793,21 +1898,21 @@ unit cgcpu;
           {-14..-1:
             begin
               if ((-value) mod 2)<>0 then
-                list.concat(taicpu.op_reg(A_PUSH,NR_R0));
+                list.concat(taicpu.op_reg(A_PUSH,GetDefaultTmpReg));
               for i:=1 to (-value) div 2 do
                 list.concat(taicpu.op_const(A_RCALL,0));
             end;
           1..7:
             begin
               for i:=1 to value do
-                list.concat(taicpu.op_reg(A_POP,NR_R0));
+                list.concat(taicpu.op_reg(A_POP,GetDefaultTmpReg));
             end;}
           else
             begin
               list.concat(taicpu.op_reg_const(A_SUBI,NR_R28,lo(word(-value))));
               list.concat(taicpu.op_reg_const(A_SBCI,NR_R29,hi(word(-value))));
               // get SREG
-              list.concat(taicpu.op_reg_const(A_IN,NR_R0,NIO_SREG));
+              list.concat(taicpu.op_reg_const(A_IN,GetDefaultTmpReg,NIO_SREG));
 
               // block interrupts
               list.concat(taicpu.op_none(A_CLI));
@@ -1816,7 +1921,7 @@ unit cgcpu;
               list.concat(taicpu.op_const_reg(A_OUT,NIO_SP_HI,NR_R29));
 
               // release interrupts
-              list.concat(taicpu.op_const_reg(A_OUT,NIO_SREG,NR_R0));
+              list.concat(taicpu.op_const_reg(A_OUT,NIO_SREG,GetDefaultTmpReg));
 
               // write low SP
               list.concat(taicpu.op_const_reg(A_OUT,NIO_SP_LO,NR_R28));
@@ -1968,8 +2073,8 @@ unit cgcpu;
                 pd:=search_system_proc(name);
                 paraloc1.init;
                 paraloc2.init;
-                paramanager.getintparaloc(list,pd,1,paraloc1);
-                paramanager.getintparaloc(list,pd,2,paraloc2);
+                paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+                paramanager.getcgtempparaloc(list,pd,2,paraloc2);
                 a_load_reg_cgpara(list,OS_8,src1,paraloc2);
                 a_load_reg_cgpara(list,OS_8,src2,paraloc1);
                 paramanager.freecgpara(list,paraloc2);
@@ -2065,8 +2170,8 @@ unit cgcpu;
                 pd:=search_system_proc(name);
                 paraloc1.init;
                 paraloc2.init;
-                paramanager.getintparaloc(list,pd,1,paraloc1);
-                paramanager.getintparaloc(list,pd,2,paraloc2);
+                paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+                paramanager.getcgtempparaloc(list,pd,2,paraloc2);
                 a_load_reg_cgpara(list,OS_16,src1,paraloc2);
                 a_load_reg_cgpara(list,OS_16,src2,paraloc1);
                 paramanager.freecgpara(list,paraloc2);
@@ -2120,21 +2225,26 @@ unit cgcpu;
               regs:=regs+[RS_R28,RS_R29];
 
             { we clear r1 }
-            include(regs,RS_R1);
+            include(regs,getsupreg(GetDefaultZeroReg));
 
-            regs:=regs+[RS_R0];
+            regs:=regs+[getsupreg(GetDefaultTmpReg)];
 
-            for reg:=RS_R31 downto RS_R0 do
-              if reg in regs then
-                list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+            if current_settings.cputype=cpu_avr1 then
+              message1(cg_w_interrupt_does_not_save_registers,current_procinfo.procdef.fullprocname(false))
+            else
+              begin
+                for reg:=RS_R31 downto RS_R0 do
+                  if reg in regs then
+                    list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+                { Save SREG }
+                cg.getcpuregister(list,GetDefaultTmpReg);
+                list.concat(taicpu.op_reg_const(A_IN, GetDefaultTmpReg, $3F));
+                list.concat(taicpu.op_reg(A_PUSH, GetDefaultTmpReg));
+                cg.ungetcpuregister(list,GetDefaultTmpReg);
+              end;
 
-            { Save SREG }
-            cg.getcpuregister(list,NR_R0);
-            list.concat(taicpu.op_reg_const(A_IN, NR_R0, $3F));
-            list.concat(taicpu.op_reg(A_PUSH, NR_R0));
-            cg.ungetcpuregister(list,NR_R0);
 
-            list.concat(taicpu.op_reg(A_CLR,NR_R1));
+            list.concat(taicpu.op_reg(A_CLR,GetDefaultZeroReg));
 
             if current_procinfo.framepointer<>NR_NO then
               begin
@@ -2208,19 +2318,23 @@ unit cgcpu;
                   end;
 
                 { we clear r1 }
-                include(regs,RS_R1);
+                include(regs,getsupreg(GetDefaultZeroReg));
 
-                { Reload SREG }
-                regs:=regs+[RS_R0];
+                if current_settings.cputype<>cpu_avr1 then
+                  begin
+                    { Reload SREG }
+                    regs:=regs+[getsupreg(GetDefaultTmpReg)];
 
-                cg.getcpuregister(list,NR_R0);
-                list.concat(taicpu.op_reg(A_POP, NR_R0));
-                list.concat(taicpu.op_const_reg(A_OUT, $3F, NR_R0));
-                cg.ungetcpuregister(list,NR_R0);
 
-                for reg:=RS_R0 to RS_R31 do
-                  if reg in regs then
-                    list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+                    cg.getcpuregister(list,GetDefaultTmpReg);
+                    list.concat(taicpu.op_reg(A_POP, GetDefaultTmpReg));
+                    list.concat(taicpu.op_const_reg(A_OUT, $3F, GetDefaultTmpReg));
+                    cg.ungetcpuregister(list,GetDefaultTmpReg);
+
+                    for reg:=RS_R0 to RS_R31 do
+                      if reg in regs then
+                        list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+                  end;
               end;
             list.concat(taicpu.op_none(A_RETI));
           end
@@ -2312,9 +2426,9 @@ unit cgcpu;
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -2391,34 +2505,45 @@ unit cgcpu;
             cg.getcpuregister(list,NR_R26);
             list.concat(taicpu.op_reg(A_POP,NR_R26));
             cg.a_label(list,l);
-            cg.getcpuregister(list,NR_R0);
-            list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
-            list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
-            cg.ungetcpuregister(list,NR_R0);
-            list.concat(taicpu.op_reg(A_DEC,countreg));
+            cg.getcpuregister(list,GetDefaultTmpReg);
+            list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref));
+            list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg));
+            cg.ungetcpuregister(list,GetDefaultTmpReg);
+            if tcgsize2size[countregsize] = 1 then
+              list.concat(taicpu.op_reg(A_DEC,countreg))
+            else
+              begin
+                list.concat(taicpu.op_reg_const(A_SUBI,countreg,1));
+                list.concat(taicpu.op_reg_reg(A_SBC,GetNextReg(countreg),GetDefaultZeroReg));
+              end;
+
             a_jmp_flags(list,F_NE,l);
             cg.ungetcpuregister(list,NR_R26);
             cg.ungetcpuregister(list,NR_R27);
             cg.ungetcpuregister(list,NR_R30);
             cg.ungetcpuregister(list,NR_R31);
-            // keep registers alive
-            list.concat(taicpu.op_reg_reg(A_MOV,countreg,countreg));
+            { keep registers alive }
+            a_reg_sync(list,countreg);
           end
         else
           begin
             SrcQuickRef:=false;
             DestQuickRef:=false;
-            if not((source.addressmode=AM_UNCHANGED) and
-                   (source.symbol=nil) and
-                   ((source.base=NR_R28) or
-                    (source.base=NR_R30)) and
-                    (source.Index=NR_NO) and
-                    (source.Offset in [0..64-len])) and
-              not((source.Base=NR_NO) and (source.Index=NR_NO)) then
+            if ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and
+              not((source.Base=NR_NO) and (source.Index=NR_NO) and (source.symbol=nil) and (source.Offset in [0..192-len]))) or
+              (
+                 not((source.addressmode=AM_UNCHANGED) and
+                     (source.symbol=nil) and
+                     ((source.base=NR_R28) or
+                      (source.base=NR_R30)) and
+                      (source.Index=NR_NO) and
+                      (source.Offset in [0..64-len])) and
+                not((source.Base=NR_NO) and (source.Index=NR_NO))
+              ) then
               begin
                 cg.getcpuregister(list,NR_R30);
                 cg.getcpuregister(list,NR_R31);
-                srcref:=normalize_ref(list,source,NR_R30)
+                srcref:=normalize_ref(list,source,NR_R30);
               end
             else
               begin
@@ -2426,13 +2551,17 @@ unit cgcpu;
                 srcref:=source;
               end;
 
-            if not((dest.addressmode=AM_UNCHANGED) and
+            if ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and
+              not((dest.Base=NR_NO) and (dest.Index=NR_NO) and (dest.symbol=nil) and (dest.Offset in [0..192-len]))) or
+              (
+                 not((dest.addressmode=AM_UNCHANGED) and
                    (dest.symbol=nil) and
                    ((dest.base=NR_R28) or
                     (dest.base=NR_R30)) and
                     (dest.Index=NR_No) and
                     (dest.Offset in [0..64-len])) and
-              not((dest.Base=NR_NO) and (dest.Index=NR_NO)) then
+                not((dest.Base=NR_NO) and (dest.Index=NR_NO))
+              ) then
               begin
                 if not(SrcQuickRef) then
                   begin
@@ -2479,10 +2608,11 @@ unit cgcpu;
 
               // CC
               // If dest is an ioreg (31 < offset < srambase) and size = 16 bit then
-              // load high byte first, then low byte
-              if (len = 2) and DestQuickRef
-                and (dest.offset > 31)
-                and (dest.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then
+              // write high byte first, then low byte
+              // but not for avrxmega3
+              if (len = 2) and DestQuickRef and (current_settings.cputype <> cpu_avrxmega3) and
+                (dest.offset > 31) and
+                (dest.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then
                 begin
                   // If src is also a 16 bit ioreg then read low byte then high byte
                   if SrcQuickRef and (srcref.offset > 31)
@@ -2495,39 +2625,39 @@ unit cgcpu;
                       tmpreg2:=GetNextReg(tmpreg);
                       list.concat(taicpu.op_reg_ref(GetLoad(srcref),tmpreg2,srcref));
 
-                      // then move temp registers to dest in reverse order
-                      inc(dstref.offset);
-                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg2));
-                      dec(dstref.offset);
-                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg));
-                    end
-                  else
-                    begin
+                    // then move temp registers to dest in reverse order
+                    inc(dstref.offset);
+                    list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg2));
+                    dec(dstref.offset);
+                    list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg));
+                  end
+                else
+                  begin
+                    srcref.addressmode:=AM_UNCHANGED;
+                    inc(srcref.offset);
+                    dstref.addressmode:=AM_UNCHANGED;
+                    inc(dstref.offset);
+
+                    cg.getcpuregister(list,GetDefaultTmpReg);
+                    list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref));
+                    list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg));
+                    cg.ungetcpuregister(list,GetDefaultTmpReg);
+
+                    if not(SrcQuickRef) then
+                      srcref.addressmode:=AM_POSTINCREMENT
+                    else
                       srcref.addressmode:=AM_UNCHANGED;
-                      inc(srcref.offset);
-                      dstref.addressmode:=AM_UNCHANGED;
-                      inc(dstref.offset);
-
-                      cg.getcpuregister(list,NR_R0);
-                      list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
-                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
-                      cg.ungetcpuregister(list,NR_R0);
-
-                      if not(SrcQuickRef) then
-                        srcref.addressmode:=AM_POSTINCREMENT
-                      else
-                        srcref.addressmode:=AM_UNCHANGED;
-
-                      dec(srcref.offset);
-                      dec(dstref.offset);
-
-                      cg.getcpuregister(list,NR_R0);
-                      list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
-                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
-                      cg.ungetcpuregister(list,NR_R0);
-                    end;
-                end
-              else
+
+                    dec(srcref.offset);
+                    dec(dstref.offset);
+
+                    cg.getcpuregister(list,GetDefaultTmpReg);
+                    list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref));
+                    list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg));
+                    cg.ungetcpuregister(list,GetDefaultTmpReg);
+                  end;
+              end
+            else
               for i:=1 to len do
                 begin
                   if not(SrcQuickRef) and (i<len) then
@@ -2540,10 +2670,10 @@ unit cgcpu;
                   else
                     dstref.addressmode:=AM_UNCHANGED;
 
-                  cg.getcpuregister(list,NR_R0);
-                  list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
-                  list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
-                  cg.ungetcpuregister(list,NR_R0);
+                  cg.getcpuregister(list,GetDefaultTmpReg);
+                  list.concat(taicpu.op_reg_ref(GetLoad(srcref),GetDefaultTmpReg,srcref));
+                  list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,GetDefaultTmpReg));
+                  cg.ungetcpuregister(list,GetDefaultTmpReg);
 
                   if SrcQuickRef then
                     inc(srcref.offset);
@@ -2732,6 +2862,15 @@ unit cgcpu;
       end;
 
 
+   procedure tcg64favr.a_op64_const_reg_reg(list: TAsmList; op: TOpCg;size: tcgsize;value: int64;src,dst : tregister64);
+      begin
+        if op in [OP_SHL,OP_SHR] then
+          tcgavr(cg).a_op_const_reg_reg_internal(list,Op,size,value,src.reglo,src.reghi,dst.reglo,dst.reghi)
+        else
+          Inherited a_op64_const_reg_reg(list,op,size,value,src,dst);
+      end;
+
+
     procedure create_codegen;
       begin
         cg:=tcgavr.create;

+ 46 - 7
compiler/avr/cpubase.pas

@@ -111,8 +111,6 @@ unit cpubase;
       first_mm_supreg    = RS_INVALID;
       first_mm_imreg     = 0;
 
-      regnumber_count_bsstart = 32;
-
       regnumber_table : array[tregisterindex] of tregister = (
         {$i ravrnum.inc}
       );
@@ -128,9 +126,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_R0,RS_R1,RS_R18..RS_R27,RS_R30,RS_R31];
       VOLATILE_FPUREGISTERS = [];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                                 Conditions
 *****************************************************************************}
@@ -232,8 +227,8 @@ unit cpubase;
 *****************************************************************************}
 
       { Stack pointer register }
-      NR_STACK_POINTER_REG = NR_R13;
-      RS_STACK_POINTER_REG = RS_R13;
+      NR_STACK_POINTER_REG = NR_INVALID;
+      RS_STACK_POINTER_REG = RS_INVALID;
       { Frame pointer register }
       RS_FRAME_POINTER_REG = RS_R28;
       NR_FRAME_POINTER_REG = NR_R28;
@@ -303,6 +298,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     function dwarf_reg(r:tregister):byte;
     function dwarf_reg_no_error(r:tregister):shortint;
     function eh_return_data_regno(nr: longint): longint;
@@ -310,6 +308,9 @@ unit cpubase;
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    function GetDefaultTmpReg : TRegister;
+    function GetDefaultZeroReg : TRegister;
+
   implementation
 
     uses
@@ -413,6 +414,24 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE]);
+            C_LT:
+              Result := (c in [C_NE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function rotl(d : dword;b : byte) : dword;
       begin
          result:=(d shr (32-b)) or (d shl b);
@@ -429,11 +448,13 @@ unit cpubase;
         result:=reg;
       end;
 
+
     function dwarf_reg_no_error(r:tregister):shortint;
       begin
         result:=regdwarf_table[findreg_by_number(r)];
       end;
 
+
     function eh_return_data_regno(nr: longint): longint;
       begin
         result:=-1;
@@ -446,4 +467,22 @@ unit cpubase;
       end;
 
 
+    function GetDefaultTmpReg: TRegister;
+      begin
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R16
+        else
+          Result:=NR_R0;
+      end;
+
+
+    function GetDefaultZeroReg: TRegister;
+      begin
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R17
+        else
+          Result:=NR_R1;
+      end;
+
+
 end.

+ 442 - 284
compiler/avr/cpuinfo.pas

@@ -37,6 +37,7 @@ Type
    { possible supported processors for this target }
    tcputype =
       (cpu_none,
+       cpu_avrtiny,
        cpu_avr1,
        cpu_avr2,
        cpu_avr25,
@@ -46,7 +47,8 @@ Type
        cpu_avr4,
        cpu_avr5,
        cpu_avr51,
-       cpu_avr6
+       cpu_avr6,
+       cpu_avrxmega3
       );
 
    tfputype =
@@ -60,150 +62,225 @@ Type
 
       ct_avrsim,
 
-      ct_atmega645,
-      ct_atmega165a,
-      ct_attiny44a,
-      ct_atmega649a,
-      ct_atmega32u4,
-      ct_attiny26,
-      ct_at90usb1287,
+      ct_at90can32,
+      ct_at90can64,
+      ct_at90can128,
+      ct_at90pwm1,
+      ct_at90pwm2b,
+      ct_at90pwm3b,
+      ct_at90pwm81,
       ct_at90pwm161,
-      ct_attiny48,
-      ct_atmega168p,
-      ct_attiny10,
-      ct_attiny84a,
+      ct_at90pwm216,
+      ct_at90pwm316,
       ct_at90usb82,
-      ct_attiny2313,
-      ct_attiny461,
-      ct_atmega3250pa,
-      ct_atmega3290a,
-      ct_atmega165p,
-      ct_attiny43u,
       ct_at90usb162,
-      ct_atmega16u4,
-      ct_attiny24a,
-      ct_atmega88p,
-      ct_attiny88,
-      ct_atmega6490p,
-      ct_attiny40,
-      ct_atmega324p,
-      ct_attiny167,
-      ct_atmega328,
-      ct_attiny861,
-      ct_attiny85,
-      ct_atmega64m1,
-      ct_atmega645p,
-      ct_atmega8u2,
-      ct_atmega329a,
-      ct_atmega8a,
-      ct_atmega324pa,
-      ct_atmega32hvb,
-      ct_at90pwm316,
-      ct_at90pwm3b,
       ct_at90usb646,
-      ct_attiny20,
-      ct_atmega16,
-      ct_atmega48a,
-      ct_attiny24,
-      ct_atmega644,
-      ct_atmega1284,
+      ct_at90usb647,
+      ct_at90usb1286,
+      ct_at90usb1287,
       ct_ata6285,
-      ct_at90can64,
-      ct_atmega48,
-      ct_at90can32,
-      ct_attiny9,
-      ct_attiny87,
-      ct_atmega1281,
-      ct_at90pwm216,
-      ct_atmega3250a,
-      ct_atmega88a,
-      ct_atmega128rfa1,
-      ct_atmega3290pa,
-      ct_at90pwm81,
-      ct_atmega325p,
-      ct_attiny84,
-      ct_atmega328p,
-      ct_attiny13a,
+      ct_ata6286,
       ct_atmega8,
-      ct_atmega1284p,
+      ct_atmega8a,
+      ct_atmega8hva,
+      ct_atmega8u2,
+      ct_atmega16,
+      ct_atmega16a,
+      ct_atmega16hva,
+      ct_atmega16hvb,
+      ct_atmega16hvbrevb,
+      ct_atmega16m1,
       ct_atmega16u2,
-      ct_attiny45,
-      ct_atmega3250,
-      ct_atmega329,
+      ct_atmega16u4,
+      ct_atmega32,
       ct_atmega32a,
-      ct_attiny5,
-      ct_at90can128,
-      ct_atmega6490,
-      ct_atmega8515,
+      ct_atmega32c1,
+      ct_atmega32hvb,
+      ct_atmega32hvbrevb,
+      ct_atmega32m1,
+      ct_atmega32u2,
+      ct_atmega32u4,
+      ct_atmega48,
+      ct_atmega48a,
+      ct_atmega48p,
+      ct_atmega48pa,
+      ct_atmega48pb,
+      ct_atmega64,
+      ct_atmega64a,
+      ct_atmega64c1,
+      ct_atmega64hve2,
+      ct_atmega64m1,
+      ct_atmega64rfr2,
+      ct_atmega88,
+      ct_atmega88a,
+      ct_atmega88p,
       ct_atmega88pa,
-      ct_atmega168a,
+      ct_atmega88pb,
       ct_atmega128,
-      ct_at90usb1286,
-      ct_atmega164pa,
-      ct_attiny828,
-      ct_atmega88,
-      ct_atmega645a,
-      ct_atmega3290p,
-      ct_atmega644p,
-      ct_atmega164a,
-      ct_attiny4313,
-      ct_atmega162,
-      ct_atmega32c1,
       ct_atmega128a,
-      ct_atmega324a,
-      ct_attiny13,
-      ct_atmega2561,
+      ct_atmega128rfa1,
+      ct_atmega128rfr2,
+      ct_atmega162,
+      ct_atmega164a,
+      ct_atmega164p,
+      ct_atmega164pa,
+      ct_atmega165a,
+      ct_atmega165p,
+      ct_atmega165pa,
+      ct_atmega168,
+      ct_atmega168a,
+      ct_atmega168p,
+      ct_atmega168pa,
+      ct_atmega168pb,
       ct_atmega169a,
-      ct_attiny261,
-      ct_atmega644a,
-      ct_atmega3290,
-      ct_atmega64a,
       ct_atmega169p,
-      ct_atmega2560,
-      ct_atmega32,
-      ct_attiny861a,
-      ct_attiny28,
-      ct_atmega48p,
-      ct_atmega8535,
-      ct_atmega168pa,
-      ct_atmega16m1,
-      ct_atmega16hvb,
-      ct_atmega164p,
+      ct_atmega169pa,
+      ct_atmega256rfr2,
+      ct_atmega324a,
+      ct_atmega324p,
+      ct_atmega324pa,
+      ct_atmega324pb,
+      ct_atmega325,
       ct_atmega325a,
+      ct_atmega325p,
+      ct_atmega325pa,
+      ct_atmega328,
+      ct_atmega328p,
+      ct_atmega328pb,
+      ct_atmega329,
+      ct_atmega329a,
+      ct_atmega329p,
+      ct_atmega329pa,
+      ct_atmega406,
       ct_atmega640,
+      ct_atmega644,
+      ct_atmega644a,
+      ct_atmega644p,
+      ct_atmega644pa,
+      ct_atmega644rfr2,
+      ct_atmega645,
+      ct_atmega645a,
+      ct_atmega645p,
+      ct_atmega649,
+      ct_atmega649a,
+      ct_atmega649p,
+      ct_atmega808,
+      ct_atmega809,
+      ct_atmega1280,
+      ct_atmega1281,
+      ct_atmega1284,
+      ct_atmega1284p,
+      ct_atmega1284rfr2,
+      ct_atmega1608,
+      ct_atmega1609,
+      ct_atmega2560,
+      ct_atmega2561,
+      ct_atmega2564rfr2,
+      ct_atmega3208,
+      ct_atmega3209,
+      ct_atmega3250,
+      ct_atmega3250a,
+      ct_atmega3250p,
+      ct_atmega3250pa,
+      ct_atmega3290,
+      ct_atmega3290a,
+      ct_atmega3290p,
+      ct_atmega3290pa,
+      ct_atmega4808,
+      ct_atmega4809,
       ct_atmega6450,
-      ct_atmega329p,
-      ct_ata6286,
-      ct_at90usb647,
-      ct_atmega168,
+      ct_atmega6450a,
+      ct_atmega6450p,
+      ct_atmega6490,
       ct_atmega6490a,
-      ct_atmega32m1,
-      ct_atmega64c1,
-      ct_atmega32u2,
+      ct_atmega6490p,
+      ct_atmega8515,
+      ct_atmega8535,
       ct_attiny4,
-      ct_atmega644pa,
-      ct_at90pwm1,
+      ct_attiny5,
+      ct_attiny9,
+      ct_attiny10,
+      ct_attiny11,
+      ct_attiny12,
+      ct_attiny13,
+      ct_attiny13a,
+      ct_attiny15,
+      ct_attiny20,
+      ct_attiny24,
+      ct_attiny24a,
+      ct_attiny25,
+      ct_attiny26,
+      ct_attiny28,
+      ct_attiny40,
+      ct_attiny43u,
       ct_attiny44,
-      ct_atmega325pa,
-      ct_atmega6450a,
-      ct_attiny2313a,
-      ct_atmega329pa,
+      ct_attiny44a,
+      ct_attiny45,
+      ct_attiny48,
+      ct_attiny84,
+      ct_attiny84a,
+      ct_attiny85,
+      ct_attiny87,
+      ct_attiny88,
+      ct_attiny102,
+      ct_attiny104,
+      ct_attiny167,
+      ct_attiny202,
+      ct_attiny204,
+      ct_attiny212,
+      ct_attiny214,
+      ct_attiny261,
+      ct_attiny261a,
+      ct_attiny402,
+      ct_attiny404,
+      ct_attiny406,
+      ct_attiny412,
+      ct_attiny414,
+      ct_attiny416,
+      ct_attiny416auto,
+      ct_attiny417,
+      ct_attiny441,
+      ct_attiny461,
       ct_attiny461a,
-      ct_atmega6450p,
-      ct_atmega64,
-      ct_atmega165pa,
-      ct_atmega16a,
-      ct_atmega649,
-      ct_atmega1280,
-      ct_at90pwm2b,
-      ct_atmega649p,
-      ct_atmega3250p,
-      ct_atmega48pa,
+      ct_attiny804,
+      ct_attiny806,
+      ct_attiny807,
+      ct_attiny814,
+      ct_attiny816,
+      ct_attiny817,
+      ct_attiny828,
+      ct_attiny841,
+      ct_attiny861,
+      ct_attiny861a,
+      ct_attiny1604,
+      ct_attiny1606,
+      ct_attiny1607,
+      ct_attiny1614,
+      ct_attiny1616,
+      ct_attiny1617,
+      ct_attiny1624,
+      ct_attiny1626,
+      ct_attiny1627,
       ct_attiny1634,
-      ct_atmega325,
-      ct_atmega169pa,
-      ct_attiny261a,
-      ct_attiny25
+      ct_attiny2313,
+      ct_attiny2313a,
+      ct_attiny3214,
+      ct_attiny3216,
+      ct_attiny3217,
+      ct_attiny4313,
+      // Controller board aliases
+      ct_arduinoleonardo,
+      ct_arduinomega,
+      ct_arduinomicro,
+      ct_arduinonano,
+      ct_arduinonanoevery,
+      ct_arduinouno,
+      ct_atmega256rfr2xpro,
+      ct_atmega324pbxpro,
+      ct_atmega1284pxplained,
+      ct_atmega4809xpro,
+      ct_attiny817xpro,
+      ct_attiny3217xpro
      );
 
    tcontrollerdatatype = record
@@ -234,7 +311,8 @@ Const
      pocall_softfloat
    ];
 
-   cputypestr : array[tcputype] of string[5] = ('',
+   cputypestr : array[tcputype] of string[9] = ('',
+     'AVRTINY',
      'AVR1',
      'AVR2',
      'AVR25',
@@ -244,7 +322,8 @@ Const
      'AVR4',
      'AVR5',
      'AVR51',
-     'AVR6'
+     'AVR6',
+     'AVRXMEGA3'
    );
 
    fputypestr : array[tfputype] of string[6] = (
@@ -282,150 +361,225 @@ Const
         eeprombase:0;
         eepromsize:4096;
         )
-        ,(controllertypestr:'ATMEGA645'; controllerunitstr:'ATMEGA645'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165A'; controllerunitstr:'ATMEGA165A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44A'; controllerunitstr:'ATTINY44A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA649A'; controllerunitstr:'ATMEGA649A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U4'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY26'; controllerunitstr:'ATTINY26'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'AT90USB1287'; controllerunitstr:'AT90USB1287'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM161'; controllerunitstr:'AT90PWM161'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY48'; controllerunitstr:'ATTINY48'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA168P'; controllerunitstr:'ATMEGA168P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY10'; controllerunitstr:'ATTINY10'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY84A'; controllerunitstr:'ATTINY84A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB82'; controllerunitstr:'AT90USB82'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY2313'; controllerunitstr:'ATTINY2313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY461'; controllerunitstr:'ATTINY461'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250PA'; controllerunitstr:'ATMEGA3250PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA3290A'; controllerunitstr:'ATMEGA3290A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA165P'; controllerunitstr:'ATMEGA165P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY43U'; controllerunitstr:'ATTINY43U'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'AT90USB162'; controllerunitstr:'AT90USB162'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16U4'; controllerunitstr:'ATMEGA16U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1280; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY24A'; controllerunitstr:'ATTINY24A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA88P'; controllerunitstr:'ATMEGA88P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY88'; controllerunitstr:'ATTINY88'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA6490P'; controllerunitstr:'ATMEGA6490P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY40'; controllerunitstr:'ATTINY40'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:64; sramsize:256; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA324P'; controllerunitstr:'ATMEGA324P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY167'; controllerunitstr:'ATTINY167'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328'; controllerunitstr:'ATMEGA328'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861'; controllerunitstr:'ATTINY861'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY85'; controllerunitstr:'ATTINY85'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA64M1'; controllerunitstr:'ATMEGA64M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA645P'; controllerunitstr:'ATMEGA645P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8U2'; controllerunitstr:'ATMEGA8U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA329A'; controllerunitstr:'ATMEGA329A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA8A'; controllerunitstr:'ATMEGA8A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA324PA'; controllerunitstr:'ATMEGA324PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32HVB'; controllerunitstr:'ATMEGA32HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM316'; controllerunitstr:'AT90PWM316'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90PWM3B'; controllerunitstr:'AT90PWM3B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB646'; controllerunitstr:'AT90USB646'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY20'; controllerunitstr:'ATTINY20'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:64; sramsize:128; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA16'; controllerunitstr:'ATMEGA16'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA48A'; controllerunitstr:'ATMEGA48A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY24'; controllerunitstr:'ATTINY24'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644'; controllerunitstr:'ATMEGA644'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1284'; controllerunitstr:'ATMEGA1284'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATA6285'; controllerunitstr:'ATA6285'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90CAN64'; controllerunitstr:'AT90CAN64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA48'; controllerunitstr:'ATMEGA48'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'AT90CAN32'; controllerunitstr:'AT90CAN32'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY9'; controllerunitstr:'ATTINY9'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY87'; controllerunitstr:'ATTINY87'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1281'; controllerunitstr:'ATMEGA1281'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM216'; controllerunitstr:'AT90PWM216'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA3250A'; controllerunitstr:'ATMEGA3250A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA88A'; controllerunitstr:'ATMEGA88A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128RFA1'; controllerunitstr:'ATMEGA128RFA1'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA3290PA'; controllerunitstr:'ATMEGA3290PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM81'; controllerunitstr:'AT90PWM81'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:256; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325P'; controllerunitstr:'ATMEGA325P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY84'; controllerunitstr:'ATTINY84'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328P'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13A'; controllerunitstr:'ATTINY13A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA8'; controllerunitstr:'ATMEGA8'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1284P'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA16U2'; controllerunitstr:'ATMEGA16U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY45'; controllerunitstr:'ATTINY45'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250'; controllerunitstr:'ATMEGA3250'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA329'; controllerunitstr:'ATMEGA329'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32A'; controllerunitstr:'ATMEGA32A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY5'; controllerunitstr:'ATTINY5'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'AT90CAN128'; controllerunitstr:'AT90CAN128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6490'; controllerunitstr:'ATMEGA6490'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8515'; controllerunitstr:'ATMEGA8515'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA88PA'; controllerunitstr:'ATMEGA88PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168A'; controllerunitstr:'ATMEGA168A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128'; controllerunitstr:'ATMEGA128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90USB1286'; controllerunitstr:'AT90USB1286'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA164PA'; controllerunitstr:'ATMEGA164PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY828'; controllerunitstr:'ATTINY828'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA88'; controllerunitstr:'ATMEGA88'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA645A'; controllerunitstr:'ATMEGA645A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290P'; controllerunitstr:'ATMEGA3290P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA644P'; controllerunitstr:'ATMEGA644P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA164A'; controllerunitstr:'ATMEGA164A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY4313'; controllerunitstr:'ATTINY4313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA162'; controllerunitstr:'ATMEGA162'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA32C1'; controllerunitstr:'ATMEGA32C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA128A'; controllerunitstr:'ATMEGA128A'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA324A'; controllerunitstr:'ATMEGA324A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13'; controllerunitstr:'ATTINY13'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA2561'; controllerunitstr:'ATMEGA2561'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA169A'; controllerunitstr:'ATMEGA169A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261'; controllerunitstr:'ATTINY261'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644A'; controllerunitstr:'ATMEGA644A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290'; controllerunitstr:'ATMEGA3290'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64A'; controllerunitstr:'ATMEGA64A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA169P'; controllerunitstr:'ATMEGA169P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA2560'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA32'; controllerunitstr:'ATMEGA32'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861A'; controllerunitstr:'ATTINY861A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY28'; controllerunitstr:'ATTINY28'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:0; sramsize:0; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA48P'; controllerunitstr:'ATMEGA48P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA8535'; controllerunitstr:'ATMEGA8535'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168PA'; controllerunitstr:'ATMEGA168PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16M1'; controllerunitstr:'ATMEGA16M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16HVB'; controllerunitstr:'ATMEGA16HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA164P'; controllerunitstr:'ATMEGA164P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325A'; controllerunitstr:'ATMEGA325A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA640'; controllerunitstr:'ATMEGA640'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6450'; controllerunitstr:'ATMEGA6450'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA329P'; controllerunitstr:'ATMEGA329P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATA6286'; controllerunitstr:'ATA6286'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90USB647'; controllerunitstr:'AT90USB647'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA168'; controllerunitstr:'ATMEGA168'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA6490A'; controllerunitstr:'ATMEGA6490A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32M1'; controllerunitstr:'ATMEGA32M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64C1'; controllerunitstr:'ATMEGA64C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U2'; controllerunitstr:'ATMEGA32U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:1024; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY4'; controllerunitstr:'ATTINY4'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA644PA'; controllerunitstr:'ATMEGA644PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'AT90PWM1'; controllerunitstr:'AT90PWM1'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44'; controllerunitstr:'ATTINY44'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325PA'; controllerunitstr:'ATMEGA325PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA6450A'; controllerunitstr:'ATMEGA6450A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY2313A'; controllerunitstr:'ATTINY2313A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA329PA'; controllerunitstr:'ATMEGA329PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY461A'; controllerunitstr:'ATTINY461A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA6450P'; controllerunitstr:'ATMEGA6450P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA64'; controllerunitstr:'ATMEGA64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165PA'; controllerunitstr:'ATMEGA165PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16A'; controllerunitstr:'ATMEGA16A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649'; controllerunitstr:'ATMEGA649'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1280'; controllerunitstr:'ATMEGA1280'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM2B'; controllerunitstr:'AT90PWM2B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649P'; controllerunitstr:'ATMEGA649P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3250P'; controllerunitstr:'ATMEGA3250P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA48PA'; controllerunitstr:'ATMEGA48PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY1634'; controllerunitstr:'ATTINY1634'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325'; controllerunitstr:'ATMEGA325'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA169PA'; controllerunitstr:'ATMEGA169PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261A'; controllerunitstr:'ATTINY261A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY25'; controllerunitstr:'ATTINY25'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
+        ,(controllertypestr:'AT90CAN32';controllerunitstr:'AT90CAN32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'AT90CAN64';controllerunitstr:'AT90CAN64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90CAN128';controllerunitstr:'AT90CAN128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90PWM1';controllerunitstr:'AT90PWM1';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM2B';controllerunitstr:'AT90PWM2B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM3B';controllerunitstr:'AT90PWM3B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM81';controllerunitstr:'AT90PWM81';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:256;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM161';controllerunitstr:'AT90PWM161';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM216';controllerunitstr:'AT90PWM216';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM316';controllerunitstr:'AT90PWM316';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB82';controllerunitstr:'AT90USB82';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB162';controllerunitstr:'AT90USB162';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB646';controllerunitstr:'AT90USB646';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB647';controllerunitstr:'AT90USB647';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB1286';controllerunitstr:'AT90USB1286';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90USB1287';controllerunitstr:'AT90USB1287';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATA6285';controllerunitstr:'ATA6285';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATA6286';controllerunitstr:'ATA6286';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATMEGA8';controllerunitstr:'ATMEGA8';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8A';controllerunitstr:'ATMEGA8A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8HVA';controllerunitstr:'ATMEGA8HVA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA8U2';controllerunitstr:'ATMEGA8U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16';controllerunitstr:'ATMEGA16';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16A';controllerunitstr:'ATMEGA16A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVA';controllerunitstr:'ATMEGA16HVA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA16HVB';controllerunitstr:'ATMEGA16HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVBREVB';controllerunitstr:'ATMEGA16HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16M1';controllerunitstr:'ATMEGA16M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U2';controllerunitstr:'ATMEGA16U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U4';controllerunitstr:'ATMEGA16U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1280;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA32';controllerunitstr:'ATMEGA32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32A';controllerunitstr:'ATMEGA32A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32C1';controllerunitstr:'ATMEGA32C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVB';controllerunitstr:'ATMEGA32HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVBREVB';controllerunitstr:'ATMEGA32HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32M1';controllerunitstr:'ATMEGA32M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U2';controllerunitstr:'ATMEGA32U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:1024;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U4';controllerunitstr:'ATMEGA32U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2560;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA48';controllerunitstr:'ATMEGA48';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48A';controllerunitstr:'ATMEGA48A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48P';controllerunitstr:'ATMEGA48P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PA';controllerunitstr:'ATMEGA48PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PB';controllerunitstr:'ATMEGA48PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA64';controllerunitstr:'ATMEGA64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64A';controllerunitstr:'ATMEGA64A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64C1';controllerunitstr:'ATMEGA64C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64HVE2';controllerunitstr:'ATMEGA64HVE2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA64M1';controllerunitstr:'ATMEGA64M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64RFR2';controllerunitstr:'ATMEGA64RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA88';controllerunitstr:'ATMEGA88';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88A';controllerunitstr:'ATMEGA88A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88P';controllerunitstr:'ATMEGA88P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PA';controllerunitstr:'ATMEGA88PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PB';controllerunitstr:'ATMEGA88PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA128';controllerunitstr:'ATMEGA128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128A';controllerunitstr:'ATMEGA128A';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFA1';controllerunitstr:'ATMEGA128RFA1';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFR2';controllerunitstr:'ATMEGA128RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA162';controllerunitstr:'ATMEGA162';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164A';controllerunitstr:'ATMEGA164A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164P';controllerunitstr:'ATMEGA164P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164PA';controllerunitstr:'ATMEGA164PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165A';controllerunitstr:'ATMEGA165A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165P';controllerunitstr:'ATMEGA165P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165PA';controllerunitstr:'ATMEGA165PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168';controllerunitstr:'ATMEGA168';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168A';controllerunitstr:'ATMEGA168A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168P';controllerunitstr:'ATMEGA168P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PA';controllerunitstr:'ATMEGA168PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PB';controllerunitstr:'ATMEGA168PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169A';controllerunitstr:'ATMEGA169A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169P';controllerunitstr:'ATMEGA169P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169PA';controllerunitstr:'ATMEGA169PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA256RFR2';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324A';controllerunitstr:'ATMEGA324A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324P';controllerunitstr:'ATMEGA324P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PA';controllerunitstr:'ATMEGA324PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PB';controllerunitstr:'ATMEGA324PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325';controllerunitstr:'ATMEGA325';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325A';controllerunitstr:'ATMEGA325A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325P';controllerunitstr:'ATMEGA325P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325PA';controllerunitstr:'ATMEGA325PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328';controllerunitstr:'ATMEGA328';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328P';controllerunitstr:'ATMEGA328P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328PB';controllerunitstr:'ATMEGA328PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329';controllerunitstr:'ATMEGA329';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329A';controllerunitstr:'ATMEGA329A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329P';controllerunitstr:'ATMEGA329P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329PA';controllerunitstr:'ATMEGA329PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA406';controllerunitstr:'ATMEGA406';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:40960;srambase:256;sramsize:2048;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA640';controllerunitstr:'ATMEGA640';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA644';controllerunitstr:'ATMEGA644';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644A';controllerunitstr:'ATMEGA644A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644P';controllerunitstr:'ATMEGA644P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644PA';controllerunitstr:'ATMEGA644PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644RFR2';controllerunitstr:'ATMEGA644RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645';controllerunitstr:'ATMEGA645';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645A';controllerunitstr:'ATMEGA645A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645P';controllerunitstr:'ATMEGA645P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649';controllerunitstr:'ATMEGA649';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649A';controllerunitstr:'ATMEGA649A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649P';controllerunitstr:'ATMEGA649P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA808';controllerunitstr:'ATMEGA808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA809';controllerunitstr:'ATMEGA809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1280';controllerunitstr:'ATMEGA1280';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1281';controllerunitstr:'ATMEGA1281';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284';controllerunitstr:'ATMEGA1284';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284P';controllerunitstr:'ATMEGA1284P';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284RFR2';controllerunitstr:'ATMEGA1284RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1608';controllerunitstr:'ATMEGA1608';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1609';controllerunitstr:'ATMEGA1609';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA2560';controllerunitstr:'ATMEGA2560';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2561';controllerunitstr:'ATMEGA2561';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2564RFR2';controllerunitstr:'ATMEGA2564RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA3208';controllerunitstr:'ATMEGA3208';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3209';controllerunitstr:'ATMEGA3209';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3250';controllerunitstr:'ATMEGA3250';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250A';controllerunitstr:'ATMEGA3250A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250P';controllerunitstr:'ATMEGA3250P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250PA';controllerunitstr:'ATMEGA3250PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290';controllerunitstr:'ATMEGA3290';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290A';controllerunitstr:'ATMEGA3290A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290P';controllerunitstr:'ATMEGA3290P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290PA';controllerunitstr:'ATMEGA3290PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA4808';controllerunitstr:'ATMEGA4808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA4809';controllerunitstr:'ATMEGA4809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA6450';controllerunitstr:'ATMEGA6450';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450A';controllerunitstr:'ATMEGA6450A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450P';controllerunitstr:'ATMEGA6450P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490';controllerunitstr:'ATMEGA6490';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490A';controllerunitstr:'ATMEGA6490A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490P';controllerunitstr:'ATMEGA6490P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA8515';controllerunitstr:'ATMEGA8515';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8535';controllerunitstr:'ATMEGA8535';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY4';controllerunitstr:'ATTINY4';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY5';controllerunitstr:'ATTINY5';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY9';controllerunitstr:'ATTINY9';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY10';controllerunitstr:'ATTINY10';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY11';controllerunitstr:'ATTINY11';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY12';controllerunitstr:'ATTINY12';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13';controllerunitstr:'ATTINY13';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13A';controllerunitstr:'ATTINY13A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY15';controllerunitstr:'ATTINY15';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY20';controllerunitstr:'ATTINY20';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:64;sramsize:128;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY24';controllerunitstr:'ATTINY24';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY24A';controllerunitstr:'ATTINY24A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY25';controllerunitstr:'ATTINY25';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY26';controllerunitstr:'ATTINY26';cputype:cpu_avr2;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY28';controllerunitstr:'ATTINY28';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY40';controllerunitstr:'ATTINY40';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:64;sramsize:256;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY43U';controllerunitstr:'ATTINY43U';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY44';controllerunitstr:'ATTINY44';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY44A';controllerunitstr:'ATTINY44A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY45';controllerunitstr:'ATTINY45';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY48';controllerunitstr:'ATTINY48';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY84';controllerunitstr:'ATTINY84';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY84A';controllerunitstr:'ATTINY84A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY85';controllerunitstr:'ATTINY85';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY87';controllerunitstr:'ATTINY87';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY88';controllerunitstr:'ATTINY88';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY102';controllerunitstr:'ATTINY102';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY104';controllerunitstr:'ATTINY104';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY167';controllerunitstr:'ATTINY167';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY202';controllerunitstr:'ATTINY202';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY204';controllerunitstr:'ATTINY204';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY212';controllerunitstr:'ATTINY212';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY214';controllerunitstr:'ATTINY214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY261';controllerunitstr:'ATTINY261';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY261A';controllerunitstr:'ATTINY261A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY402';controllerunitstr:'ATTINY402';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY404';controllerunitstr:'ATTINY404';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY406';controllerunitstr:'ATTINY406';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY412';controllerunitstr:'ATTINY412';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY414';controllerunitstr:'ATTINY414';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416';controllerunitstr:'ATTINY416';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416AUTO';controllerunitstr:'ATTINY416AUTO';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY417';controllerunitstr:'ATTINY417';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY441';controllerunitstr:'ATTINY441';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461';controllerunitstr:'ATTINY461';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461A';controllerunitstr:'ATTINY461A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY804';controllerunitstr:'ATTINY804';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY806';controllerunitstr:'ATTINY806';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY807';controllerunitstr:'ATTINY807';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY814';controllerunitstr:'ATTINY814';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY816';controllerunitstr:'ATTINY816';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY817';controllerunitstr:'ATTINY817';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY828';controllerunitstr:'ATTINY828';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY841';controllerunitstr:'ATTINY841';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861';controllerunitstr:'ATTINY861';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861A';controllerunitstr:'ATTINY861A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY1604';controllerunitstr:'ATTINY1604';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1606';controllerunitstr:'ATTINY1606';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1607';controllerunitstr:'ATTINY1607';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1614';controllerunitstr:'ATTINY1614';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1616';controllerunitstr:'ATTINY1616';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1617';controllerunitstr:'ATTINY1617';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1624';controllerunitstr:'ATTINY1624';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1626';controllerunitstr:'ATTINY1626';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1627';controllerunitstr:'ATTINY1627';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1634';controllerunitstr:'ATTINY1634';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY2313';controllerunitstr:'ATTINY2313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY2313A';controllerunitstr:'ATTINY2313A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY3214';controllerunitstr:'ATTINY3214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3216';controllerunitstr:'ATTINY3216';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3217';controllerunitstr:'ATTINY3217';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY4313';controllerunitstr:'ATTINY4313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        // Controller board aliases
+        ,(controllertypestr:'ARDUINOLEONARDO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINOMEGA'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ARDUINOMICRO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANOEVERY'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ARDUINOUNO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA256RFR2XPRO';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324PBXPRO'; controllerunitstr:'ATMEGA324PB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA1284PXPLAINED'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ATMEGA4809XPRO'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ATTINY817XPRO'; controllerunitstr:'ATTINY817'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:15872; sramsize:512; eeprombase:5120; eepromsize:128)
+        ,(controllertypestr:'ATTINY3217XPRO'; controllerunitstr:'ATTINY3217'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:14336; sramsize:2048; eeprombase:5120; eepromsize:256)
    );
 
    { Supported optimizations, only used for information }
@@ -434,12 +588,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
@@ -453,22 +607,26 @@ Const
        CPUAVR_HAS_ELPM,
        CPUAVR_HAS_ELPMX,
        CPUAVR_2_BYTE_PC,
-       CPUAVR_3_BYTE_PC
+       CPUAVR_3_BYTE_PC,
+       CPUAVR_16_REGS,
+       CPUAVR_NOMEMMAPPED_REGS
       );
 
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
-     ( { cpu_none  } [],
-       { cpu_avr1  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr2  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr25 } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr3  } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
-       { cpu_avr31 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
-       { cpu_avr35 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr4  } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr5  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr51 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr6  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC]
+     ( { cpu_none      } [],
+       { cpu_avrtiny   } [CPUAVR_16_REGS,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS],
+       { cpu_avr1      } [CPUAVR_2_BYTE_PC],
+       { cpu_avr2      } [CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr25     } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr3      } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
+       { cpu_avr31     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
+       { cpu_avr35     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr4      } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr5      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr51     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr6      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC],
+       { cpu_avrxmega3 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS]
      );
 
 Implementation

+ 22 - 16
compiler/avr/cpupara.pas

@@ -57,7 +57,10 @@ unit cpupara;
 
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
-        result:=VOLATILE_INTREGISTERS;
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          result:=VOLATILE_INTREGISTERS-[RS_R18,RS_R19]
+        else
+          result:=VOLATILE_INTREGISTERS;
       end;
 
 
@@ -167,7 +170,7 @@ unit cpupara;
             result:=not(def.size in [1,2,4]);
           }
           else
-            if (def.size > 8) then
+            if (def.size > 8) or ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and (def.size > 4)) then
               result:=true
             else
               result:=inherited ret_in_param(def,pd);
@@ -204,7 +207,8 @@ unit cpupara;
         begin
           { In case of po_delphi_nested_cc, the parent frame pointer
             is always passed on the stack. }
-           if (nextintreg>RS_R9) and
+           if (((nextintreg>RS_R9) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+               (nextintreg>RS_R21)) and
               (not(vo_is_parentfp in hp.varoptions) or
                not(po_delphi_nested_cc in p.procoptions)) then
              begin
@@ -303,7 +307,8 @@ unit cpupara;
                    by adding paralen mod 2, make the size even
                  }
                  nextintreg:=curintreg-(paralen+(paralen mod 2))+1;
-                 if nextintreg>=RS_R8 then
+                 if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                   (nextintreg>=RS_R20) then
                    curintreg:=nextintreg-1
                  else
                    begin
@@ -338,7 +343,8 @@ unit cpupara;
                  case loc of
                     LOC_REGISTER:
                       begin
-                        if nextintreg>=RS_R8 then
+                        if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                          (nextintreg>=RS_R20) then
                           begin
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.size:=OS_8;
@@ -355,19 +361,19 @@ unit cpupara;
                       begin
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
-                            paraloc^.size:=OS_ADDR;
-                            paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
-                            assignintreg
-                          end
+                           paraloc^.size:=OS_ADDR;
+                           paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
+                           assignintreg;
+                         end
                         else
                           begin
-                             paraloc^.def:=hp.vardef;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,hp.vardef.size);
-                          end;
-                        dec(paralen,hp.vardef.size);
+                            paraloc^.def:=hp.vardef;
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            inc(stack_offset,paralen);
+                         end;
+                        paralen:=0;
                       end;
                     else
                       internalerror(2002071002);

+ 3 - 2
compiler/avr/cpupi.pas

@@ -44,7 +44,7 @@ unit cpupi;
   implementation
 
     uses
-       globals,systems,
+       globals,systems,verbose,
        cpubase,
        aasmtai,aasmdata,
        tgobj,
@@ -80,7 +80,8 @@ unit cpupi;
       begin
         { because of the limited branch distance of cond. branches, they must be replaced
           sometimes by normal jmps and an inverse branch }
-        finalizeavrcode(aktproccode);
+        if not(finalizeavrcode(aktproccode)) then
+          message1(cg_w_cannot_compile_subroutine,procdef.fullprocname(false));
       end;
 
 begin

+ 4 - 22
compiler/avr/itcpugas.pas

@@ -53,39 +53,21 @@ interface
 implementation
 
     uses
-      cutils,verbose;
+      cutils,verbose,rgbase;
 
     const
-      gas_regname_table : array[tregisterindex] of string[7] = (
+      gas_regname_table : TRegNameTable = (
         {$i ravrstd.inc}
       );
 
-      gas_regname_index : array[tregisterindex] of tregisterindex = (
+      gas_regname_index : TRegisterIndexTable = (
         {$i ravrsri.inc}
       );
 
-    function findreg_by_gasname(const s:string):tregisterindex;
-      var
-        i,p : tregisterindex;
-      begin
-        {Binary search.}
-        p:=0;
-        i:=regnumber_count_bsstart;
-        repeat
-          if (p+i<=high(tregisterindex)) and (gas_regname_table[gas_regname_index[p+i]]<=s) then
-            p:=p+i;
-          i:=i shr 1;
-        until i=0;
-        if gas_regname_table[gas_regname_index[p]]=s then
-          findreg_by_gasname:=gas_regname_index[p]
-        else
-          findreg_by_gasname:=0;
-      end;
-
 
     function gas_regnum_search(const s:string):Tregister;
       begin
-        result:=regnumber_table[findreg_by_gasname(s)];
+        result:=regnumber_table[findreg_by_name_table(s,gas_regname_table,gas_regname_index)];
       end;
 
 

+ 11 - 6
compiler/avr/navradd.pas

@@ -217,7 +217,7 @@ interface
                   tmpreg1:=cg.GetNextReg(tmpreg1);
               end;
 
-            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,NR_R1));
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,GetDefaultZeroReg));
 
             location_reset(location,LOC_FLAGS,OS_NO);
             location.resflags:=getresflags(unsigned);
@@ -229,13 +229,18 @@ interface
           begin
             { decrease register pressure on registers >= r16 }
             if (right.location.value and $ff)=0 then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R1))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,GetDefaultZeroReg))
             else
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CPI,left.location.register,right.location.value and $ff))
+              begin
+                cg.getcpuregister(current_asmdata.CurrAsmList,NR_R26);
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_LDI,NR_R26,right.location.value and $ff));
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R26));
+                cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R26);
+              end;
           end
         { on the left side, we allow only a constant if it is 0 }
         else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,NR_R1,right.location.register))
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,right.location.register))
         else
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,right.location.register));
 
@@ -262,7 +267,7 @@ interface
               begin
                 { just use R1? }
                 if ((right.location.value64 shr ((i-1)*8)) and $ff)=0 then
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,NR_R1))
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,GetDefaultZeroReg))
                 else
                   begin
                     tmpreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
@@ -272,7 +277,7 @@ interface
               end
             { above it is checked, if left=0, then a constant is allowed }
             else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,NR_R1,tmpreg2))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,GetDefaultZeroReg,tmpreg2))
             else
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
           end;

+ 47 - 6
compiler/avr/navrmat.pas

@@ -34,7 +34,9 @@ interface
       end;
 
       tavrshlshrnode = class(tcgshlshrnode)
+        function pass_1: tnode;override;
         procedure second_integer;override;
+        procedure second_64bit;override;
       end;
 
 implementation
@@ -46,7 +48,7 @@ implementation
       aasmbase,aasmcpu,aasmtai,aasmdata,
       defutil,
       cgbase,cgobj,hlcgobj,cgutils,
-      pass_2,procinfo,
+      pass_1,pass_2,procinfo,
       ncon,
       cpubase,
       ncgutil,cgcpu;
@@ -108,7 +110,7 @@ implementation
                  LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE :
                    begin
                      hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
-                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CPI,left.location.register,0));
+                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,left.location.register));
 
                      tmpreg:=left.location.register;
                      for i:=2 to tcgsize2size[left.location.size] do
@@ -117,7 +119,7 @@ implementation
                            tmpreg:=left.location.registerhi
                          else
                            tmpreg:=cg.GetNextReg(tmpreg);
-                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,NR_R1,tmpreg));
+                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,GetDefaultZeroReg,tmpreg));
                        end;
                      location_reset(location,LOC_FLAGS,OS_NO);
                      location.resflags:=F_EQ;
@@ -129,6 +131,28 @@ implementation
       end;
 
 
+{*****************************************************************************
+                             TAVRSHLSHRNODE
+*****************************************************************************}
+
+    function tavrshlshrnode.pass_1 : tnode;
+      begin
+        { the avr code generator can handle 64 bit shifts by constants directly }
+        if is_constintnode(right) and is_64bit(resultdef) then
+          begin
+            result:=nil;
+            firstpass(left);
+            firstpass(right);
+            if codegenerror then
+              exit;
+
+            expectloc:=LOC_REGISTER;
+          end
+        else
+          Result:=inherited pass_1;
+      end;
+
+
     procedure tavrshlshrnode.second_integer;
       var
          op : topcg;
@@ -152,7 +176,13 @@ implementation
           (left.location.size<>opsize) then
           hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,true);
         location_reset(location,LOC_REGISTER,opsize);
-        location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
+        if is_64bit(resultdef) then
+          begin
+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+            location.registerhi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+          end
+        else
+          location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
 
         { shifting by a constant directly coded: }
         if (right.nodetype=ordconstn) then
@@ -164,8 +194,12 @@ implementation
                shiftval:=tordconstnode(right).value.uvalue and 31
              else
                shiftval:=tordconstnode(right).value.uvalue and 63;
-             hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,op,opdef,
-               shiftval,left.location.register,location.register);
+             if is_64bit(resultdef) then
+               cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,op,location.size,
+                 shiftval,left.location.register64,location.register64)
+             else
+               hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,op,opdef,
+                 shiftval,left.location.register,location.register);
           end
         else
           begin
@@ -186,6 +220,13 @@ implementation
           end;
       end;
 
+
+    procedure tavrshlshrnode.second_64bit;
+      begin
+        second_integer;
+        // inherited second_64bit;
+      end;
+
 begin
   cnotnode:=tavrnotnode;
   cshlshrnode:=tavrshlshrnode;

+ 6 - 4
compiler/avr/rgcpu.pas

@@ -49,8 +49,10 @@ unit rgcpu;
 
     uses
       verbose, cutils,
+      globals,
       cgobj,
-      procinfo;
+      procinfo,
+      cpuinfo;
 
 
     procedure trgcpu.add_constraints(reg:tregister);
@@ -95,7 +97,7 @@ unit rgcpu;
         helplist : TAsmList;
         hreg     : tregister;
       begin
-        if abs(spilltemp.offset)>63 then
+        if (abs(spilltemp.offset)>63) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
           begin
             helplist:=TAsmList.create;
 
@@ -121,7 +123,7 @@ unit rgcpu;
         helplist : TAsmList;
         hreg     : tregister;
       begin
-        if abs(spilltemp.offset)>63 then
+        if (abs(spilltemp.offset)>63) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
           begin
             helplist:=TAsmList.create;
 
@@ -187,7 +189,7 @@ unit rgcpu;
         b : byte;
       begin
         result:=false;
-        if not(spilltemp.offset in [0..63]) then
+        if not(spilltemp.offset in [0..63]) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
           exit;
 
         { Replace 'mov  dst,orgreg' with 'ldd  dst,spilltemp'

+ 45 - 0
compiler/avr/tripletcpu.pas

@@ -0,0 +1,45 @@
+{
+    Copyright (c) 2020 by Jonas Maebe
+
+    Construct the cpu part of the triplet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit tripletcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+
+implementation
+
+uses
+  globals, cpuinfo;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+  begin
+    result:='avr';
+  end;
+
+
+end.
+

+ 3 - 3
compiler/blockutl.pas

@@ -174,7 +174,7 @@ implementation
       { find the type of the descriptor structure }
       descriptordef:=search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_DESCRIPTOR_SIMPLE',true).typedef;
       { create new static variable }
-      descriptor:=cstaticvarsym.create(name,vs_value,descriptordef,[],true);
+      descriptor:=cstaticvarsym.create(name,vs_value,descriptordef,[]);
       symtablestack.top.insert(descriptor);
       include(descriptor.symoptions,sp_internal);
       { create typed constant for the descriptor }
@@ -227,7 +227,7 @@ implementation
         begin
           { alias for the type to invoke the procvar, used in the symcreat
             handling of tsk_block_invoke_procvar }
-          result.localst.insert(ctypesym.create('__FPC_BLOCK_INVOKE_PV_TYPE',orgpv,true));
+          result.localst.insert(ctypesym.create('__FPC_BLOCK_INVOKE_PV_TYPE',orgpv));
           result.synthetickind:=tsk_block_invoke_procvar;
         end;
     end;
@@ -253,7 +253,7 @@ implementation
       result:=cstaticvarsym.create(
         '$'+literalname,
         vs_value,
-        blockliteraldef,[],true);
+        blockliteraldef,[]);
       include(result.symoptions,sp_internal);
       symtablestack.top.insert(result);
       { initialise it }

+ 3 - 8
compiler/browcol.pas

@@ -1560,15 +1560,10 @@ end;
                    Symbol^.Flags:=(Symbol^.Flags or sfPointer);
                    Symbol^.RelatedTypeID:=Ptrint(tpointerdef(vardef).pointeddef);
                  end;
-               if typ=fieldvarsym then
-                 MemInfo.Addr:=tfieldvarsym(sym).fieldoffset
+               if tabstractnormalvarsym(sym).localloc.loc=LOC_REFERENCE then
+                 MemInfo.Addr:=tabstractnormalvarsym(sym).localloc.reference.offset
                else
-                 begin
-                   if tabstractnormalvarsym(sym).localloc.loc=LOC_REFERENCE then
-                     MemInfo.Addr:=tabstractnormalvarsym(sym).localloc.reference.offset
-                   else
-                     MemInfo.Addr:=0;
-                 end;
+                 MemInfo.Addr:=0;
                if assigned(vardef) and (vardef.typ=arraydef) then
                  begin
                    if tarraydef(vardef).highrange<tarraydef(vardef).lowrange then

+ 1 - 2
compiler/ccharset.pas

@@ -25,7 +25,6 @@ unit ccharset;
        tunicodestring = ^tunicodechar;
 
        tcsconvert = class
-         // !!!!!!1constructor create;
        end;
 
        tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
@@ -205,7 +204,7 @@ unit ccharset;
               hp:=hp^.next;
            end;
          getmap:=nil;
-      end;////////
+      end;
 
     function getmap(cp : word) : punicodemap;
 

+ 46 - 4
compiler/cclasses.pas

@@ -2190,7 +2190,7 @@ end;
         while assigned(NewNode) do
          begin
            Next:=NewNode.Next;
-           prefetch(next.next);
+           prefetch(pointer(Next)^);
            NewNode.Free;
            NewNode:=Next;
           end;
@@ -2798,9 +2798,51 @@ end;
 
 
     function tdynamicarray.equal(other:tdynamicarray):boolean;
-      begin
-        result:=false;
-        { TODO }
+      var
+        ofsthis,
+        ofsother,
+        remthis,
+        remother,
+        len : sizeint;
+        blockthis,
+        blockother : pdynamicblock;
+      begin
+        if not assigned(other) then
+          exit(false);
+        if size<>other.size then
+          exit(false);
+        blockthis:=Firstblock;
+        blockother:=other.FirstBlock;
+        ofsthis:=0;
+        ofsother:=0;
+
+        while assigned(blockthis) and assigned(blockother) do
+          begin
+            remthis:=blockthis^.used-ofsthis;
+            remother:=blockother^.used-ofsother;
+            len:=min(remthis,remother);
+            if not CompareMem(@blockthis^.data[ofsthis],@blockother^.data[ofsother],len) then
+              exit(false);
+            inc(ofsthis,len);
+            inc(ofsother,len);
+            if ofsthis=blockthis^.used then
+              begin
+                blockthis:=blockthis^.next;
+                ofsthis:=0;
+              end;
+            if ofsother=blockother^.used then
+              begin
+                blockother:=blockother^.next;
+                ofsother:=0;
+              end;
+          end;
+
+        if assigned(blockthis) and not assigned(blockother) then
+          result:=blockthis^.used=0
+        else if assigned(blockother) and not assigned(blockthis) then
+          result:=blockother^.used=0
+        else
+          result:=true;
       end;
 
 

+ 30 - 0
compiler/cepiktimer.pas

@@ -0,0 +1,30 @@
+{
+    Copyright (c) 2018 by Florian Klaempfl
+
+    Basic infrastructure for measuring timings of different compilation steps
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+{$macro on}
+{ "fix" the unit name }
+{$define epiktimer:=cepiktimer}
+{ do not depend on the classes unit }
+{$DEFINE NOCLASSES}
+{ include the original file }
+{$i ../../epiktimer/epiktimer.pas}
+

+ 52 - 13
compiler/cfidwarf.pas

@@ -63,7 +63,14 @@ interface
       end;
 
       TDwarfAsmCFI=class(TAsmCFI)
-        use_eh_frame : boolean;
+      public type
+        TDataType = (
+          dt_none,
+          dt_debug,
+          dt_eh_frame
+        );
+      public
+        datatype : TDataType;
         constructor create;override;
       end;
 
@@ -115,7 +122,7 @@ interface
 implementation
 
     uses
-      systems,
+      systems,globals,
       cutils,
       verbose,
       dwarfbase;
@@ -230,7 +237,11 @@ implementation
       begin
         inherited;
         if tf_use_psabieh in target_info.flags then
-          use_eh_frame:=true;
+          datatype:=dt_eh_frame
+        else if cs_debuginfo in current_settings.moduleswitches then
+          datatype:=dt_debug
+        else
+          datatype:=dt_none;
       end;
 
 
@@ -257,7 +268,7 @@ implementation
       end;
 
 
-{$ifdef i386}
+{$if defined(i386)}
     { if more cpu dependend stuff is implemented, this needs more refactoring }
     procedure TDwarfAsmCFILowLevel.generate_initial_instructions(list:TAsmList);
       begin
@@ -268,7 +279,17 @@ implementation
         list.concat(tai_const.create_uleb128bit(dwarf_reg(NR_RETURN_ADDRESS_REG)));
         list.concat(tai_const.create_uleb128bit((-sizeof(aint)) div data_alignment_factor));
       end;
-{$else i386}
+{$elseif defined(avr)}
+    procedure TDwarfAsmCFILowLevel.generate_initial_instructions(list:TAsmList);
+      begin
+        list.concat(tai_const.create_8bit(DW_CFA_def_cfa));
+        list.concat(tai_const.create_uleb128bit(32));
+        list.concat(tai_const.create_uleb128bit(2));
+        list.concat(tai_const.create_8bit(DW_CFA_offset_extended));
+        list.concat(tai_const.create_uleb128bit(36));
+        list.concat(tai_const.create_uleb128bit((-1) div data_alignment_factor));
+      end;
+{$else}
     { if more cpu dependend stuff is implemented, this needs more refactoring }
     procedure TDwarfAsmCFILowLevel.generate_initial_instructions(list:TAsmList);
       begin
@@ -294,10 +315,14 @@ implementation
         tc             : tai_const;
       begin
         CurrentLSDALabel:=nil;
-        if use_eh_frame then
-          new_section(list,sec_eh_frame,'',0)
-        else
-          new_section(list,sec_debug_frame,'',0);
+        case datatype of
+          dt_none:
+            exit;
+          dt_debug:
+            new_section(list,sec_debug_frame,'',0);
+          dt_eh_frame:
+            new_section(list,sec_eh_frame,'',0);
+        end;
         { debug_frame:
             CIE
              DWORD   length
@@ -328,7 +353,7 @@ implementation
         current_asmdata.getlabel(lenendlabel,alt_dbgframe);
         list.concat(tai_const.create_rel_sym(aitconst_32bit,lenstartlabel,lenendlabel));
         list.concat(tai_label.create(lenstartlabel));
-        if use_eh_frame then
+        if datatype=dt_eh_frame then
           begin
             list.concat(tai_const.create_32bit(0));
             list.concat(tai_const.create_8bit(1));
@@ -348,7 +373,7 @@ implementation
         list.concat(tai_const.create_sleb128bit(data_alignment_factor));
         list.concat(tai_const.create_8bit(dwarf_reg(NR_RETURN_ADDRESS_REG)));
         { augmentation data }
-        if use_eh_frame then
+        if datatype=dt_eh_frame then
           begin
             current_asmdata.getlabel(augstartlabel,alt_dbgframe);
             current_asmdata.getlabel(augendlabel,alt_dbgframe);
@@ -401,7 +426,7 @@ implementation
                   }
                   list.concat(tai_const.create_rel_sym(aitconst_32bit,lenstartlabel,lenendlabel));
                   list.concat(tai_label.create(lenstartlabel));
-                  if use_eh_frame then
+                  if datatype=dt_eh_frame then
                     begin
                       { relative offset to the CIE }
                       current_asmdata.getlabel(fdeofslabel,alt_dbgframe);
@@ -423,7 +448,7 @@ implementation
                   list.concat(tai_const.create_rel_sym(aitconst_ptr,hp.oper[0].beginsym,hp.oper[0].endsym));
 
                   { we wrote a 'z' into the CIE augmentation data }
-                  if use_eh_frame then
+                  if datatype=dt_eh_frame then
                     begin
                       { size of augmentation }
                       list.concat(tai_const.create_8bit(sizeof(pint)));
@@ -460,6 +485,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.start_frame(list:TAsmList);
       begin
+        if datatype=dt_none then
+          exit;
         current_asmdata.getlabel(FFrameEndLabel,alt_dbgframe);
         FLastloclabel:=get_frame_start;
         list.concat(tai_label.create(get_frame_start));
@@ -483,6 +510,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.outmost_frame(list: TAsmList);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_reg(DW_CFA_undefined,doe_uleb,NR_RETURN_ADDRESS_REG));
       end;
@@ -490,6 +519,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.end_frame(list:TAsmList);
       begin
+        if datatype=dt_none then
+          exit;
         if not assigned(FFrameStartLabel) then
           internalerror(2004041213);
         DwarfList.concat(tdwarfitem.create(DW_CFA_end_frame));
@@ -515,6 +546,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_offset(list:TAsmList;reg:tregister;ofs:longint);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
 { TODO: check if ref is a temp}
         { offset must be positive }
@@ -524,6 +557,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_restore(list:TAsmList;reg:tregister);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_reg(DW_CFA_restore_extended,doe_uleb,reg));
       end;
@@ -531,6 +566,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_def_cfa_register(list:TAsmList;reg:tregister);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_reg(DW_CFA_def_cfa_register,doe_uleb,reg));
       end;
@@ -538,6 +575,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_def_cfa_offset(list:TAsmList;ofs:longint);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_const(DW_CFA_def_cfa_offset,doe_uleb,ofs));
       end;

+ 10 - 7
compiler/cfileutl.pas

@@ -99,7 +99,7 @@ interface
 
       TSearchPathList = class(TCmdStrList)
         procedure AddPath(s:TCmdStr;addfirst:boolean);overload;
-        procedure AddPath(SrcPath,s:TCmdStr;addfirst:boolean);overload;
+        procedure AddLibraryPath(const sysroot: TCmdStr; s:TCmdStr;addfirst:boolean);overload;
         procedure AddList(list:TSearchPathList;addfirst:boolean);
         function  FindFile(const f : TCmdStr;allowcache:boolean;var foundfile:TCmdStr):boolean;
       end;
@@ -526,7 +526,7 @@ end;
    function CurDirRelPath(systeminfo: tsysteminfo): TCmdStr;
 
    begin
-     if systeminfo.system <> system_powerpc_macos then
+     if systeminfo.system <> system_powerpc_macosclassic then
        CurDirRelPath:= '.'+systeminfo.DirSep
      else
        CurDirRelPath:= ':'
@@ -877,7 +877,7 @@ end;
      var
        i      : longint;
      begin
-       if source_info.system = system_powerpc_MACOS then
+       if source_info.system = system_powerpc_macosclassic then
          FixFileName:= TranslatePathToMac(s, true)
        else
         if (tf_files_case_aware in source_info.flags) or
@@ -940,7 +940,7 @@ end;
      var
        i : longint;
      begin
-       if target_info.system = system_powerpc_MACOS then
+       if target_info.system = system_powerpc_macosclassic then
          TargetFixFileName:= TranslatePathToMac(s, true)
        else
         if (tf_files_case_aware in target_info.flags) or
@@ -995,11 +995,11 @@ end;
 
     procedure TSearchPathList.AddPath(s:TCmdStr;addfirst:boolean);
       begin
-        AddPath('',s,AddFirst);
+        AddLibraryPath('',s,AddFirst);
       end;
 
 
-   procedure TSearchPathList.AddPath(SrcPath,s:TCmdStr;addfirst:boolean);
+   procedure TSearchPathList.AddLibraryPath(const sysroot: TCmdStr; s:TCmdStr;addfirst:boolean);
      var
        staridx,
        i,j      : longint;
@@ -1074,7 +1074,10 @@ end;
 
          { fix pathname }
          DePascalQuote(currPath);
-         currPath:=SrcPath+FixPath(currPath,false);
+         { GNU LD convention: if library search path starts with '=', it's relative to the
+           sysroot; otherwise, interpret it as a regular path }
+         if (length(currPath) >0) and (currPath[1]='=') then
+           currPath:=sysroot+FixPath(copy(currPath,2,length(currPath)-1),false);
          if currPath='' then
            currPath:= CurDirRelPath(source_info)
          else

+ 60 - 5
compiler/cg64f32.pas

@@ -72,6 +72,7 @@ unit cg64f32;
         procedure a_op64_reg_ref(list : TAsmList;op:TOpCG;size : tcgsize;reg : tregister64; const ref: treference);override;
         procedure a_op64_const_loc(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const l: tlocation);override;
         procedure a_op64_reg_loc(list : TAsmList;op:TOpCG;size : tcgsize;reg : tregister64;const l : tlocation);override;
+        procedure a_op64_ref_loc(list: TAsmList; op: TOpCG; size: tcgsize;const ref: treference; const l: tlocation);override;
         procedure a_op64_loc_reg(list : TAsmList;op:TOpCG;size : tcgsize;const l : tlocation;reg : tregister64);override;
         procedure a_op64_const_ref(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const ref : treference);override;
 
@@ -324,8 +325,23 @@ unit cg64f32;
             reg.reglo:=reg.reghi;
             reg.reghi:=tmpreg;
           end;
-        cg.a_load_reg_ref(list,OS_32,OS_32,reg.reglo,ref);
         tmpref := ref;
+{$if defined(cpu8bitalu) or defined(cpu16bitalu)}
+        { Preload base and index to a separate temp register for 8 & 16 bit CPUs 
+          to reduce spilling and produce a better code. }
+        if (tmpref.base<>NR_NO) and (getsupreg(tmpref.base)>=first_int_imreg) then
+          begin
+            tmpreg:=cg.getaddressregister(list);
+            cg.a_load_reg_reg(list,OS_ADDR,OS_ADDR,tmpref.base,tmpreg);
+            tmpref.base:=tmpreg;
+            if tmpref.index<>NR_NO then
+              begin
+                cg.a_op_reg_reg(list,OP_ADD,OS_ADDR,tmpref.index,tmpref.base);
+                tmpref.index:=NR_NO;
+              end;
+          end;
+{$endif}
+        cg.a_load_reg_ref(list,OS_32,OS_32,reg.reglo,tmpref);
         inc(tmpref.offset,4);
         cg.a_load_reg_ref(list,OS_32,OS_32,reg.reghi,tmpref);
       end;
@@ -356,6 +372,21 @@ unit cg64f32;
             reg.reghi := tmpreg;
           end;
         tmpref := ref;
+{$if defined(cpu8bitalu) or defined(cpu16bitalu)}
+        { Preload base and index to a separate temp register for 8 & 16 bit CPUs 
+          to reduce spilling and produce a better code. }
+        if (tmpref.base<>NR_NO) and (getsupreg(tmpref.base)>=first_int_imreg) then
+          begin
+            tmpreg:=cg.getaddressregister(list);
+            cg.a_load_reg_reg(list,OS_ADDR,OS_ADDR,tmpref.base,tmpreg);
+            tmpref.base:=tmpreg;
+            if tmpref.index<>NR_NO then
+              begin
+                cg.a_op_reg_reg(list,OP_ADD,OS_ADDR,tmpref.index,tmpref.base);
+                tmpref.index:=NR_NO;
+              end;
+          end;
+{$endif}
         if (tmpref.base=reg.reglo) then
          begin
            tmpreg:=cg.getaddressregister(list);
@@ -705,6 +736,25 @@ unit cg64f32;
       end;
 
 
+    procedure tcg64f32.a_op64_ref_loc(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference;const l : tlocation);
+      var
+        tempreg: tregister64;
+      begin
+        case l.loc of
+          LOC_REFERENCE, LOC_CREFERENCE:
+            begin
+              tempreg.reghi:=cg.getintregister(list,OS_32);
+              tempreg.reglo:=cg.getintregister(list,OS_32);
+              a_load64_ref_reg(list,ref,tempreg);
+              a_op64_reg_ref(list,op,size,tempreg,l.reference);
+            end;
+          LOC_REGISTER,LOC_CREGISTER:
+            a_op64_ref_reg(list,op,size,ref,l.register64);
+          else
+            internalerror(2020042803);
+        end;
+      end;
+
 
     procedure tcg64f32.a_op64_loc_reg(list : TAsmList;op:TOpCG;size : tcgsize;const l : tlocation;reg : tregister64);
       begin
@@ -738,12 +788,17 @@ unit cg64f32;
       begin
         tempreg.reghi:=cg.getintregister(list,OS_32);
         tempreg.reglo:=cg.getintregister(list,OS_32);
-        a_load64_ref_reg(list,ref,tempreg);
         if op in [OP_NEG,OP_NOT] then
-          a_op64_reg_reg(list,op,size,tempreg,tempreg)
+          begin
+            a_op64_reg_reg(list,op,size,reg,tempreg);
+            a_load64_reg_ref(list,tempreg,ref);
+          end
         else
-          a_op64_reg_reg(list,op,size,reg,tempreg);
-        a_load64_reg_ref(list,tempreg,ref);
+          begin
+            a_load64_ref_reg(list,ref,tempreg);
+            a_op64_reg_reg(list,op,size,reg,tempreg);
+            a_load64_reg_ref(list,tempreg,ref);
+          end;
       end;
 
 

+ 104 - 46
compiler/cgbase.pas

@@ -107,6 +107,10 @@ interface
          ,addr_hi8
          ,addr_hi8_gs
          {$ENDIF}
+         {$IFDEF Z80}
+         ,addr_lo8
+         ,addr_hi8
+         {$ENDIF}
          {$IFDEF i8086}
          ,addr_dgroup      // the data segment group
          ,addr_fardataseg  // the far data segment of the current pascal module (unit or program)
@@ -125,6 +129,9 @@ interface
          {$IFDEF ARM}
          ,addr_gottpoff
          ,addr_tpoff
+         ,addr_tlsgd
+         ,addr_tlsdesc
+         ,addr_tlscall
          {$ENDIF}
          {$IFDEF i386}
          ,addr_ntpoff
@@ -188,14 +195,9 @@ interface
                   OS_S8,  OS_S16,  OS_S32,  OS_S64,  OS_S128,
                  { single, double, extended, comp, float128 }
                   OS_F32, OS_F64,  OS_F80,  OS_C64,  OS_F128,
-                 { multi-media sizes: split in byte, word, dword, ... }
-                 { entities, then the signed counterparts             }
-                  OS_M8,  OS_M16,  OS_M32,  OS_M64,  OS_M128,  OS_M256,  OS_M512,
-                  OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128, OS_MS256, OS_MS512,
-                 { multi-media sizes: single-precision floating-point }
-                  OS_MF32, OS_MF128, OS_MF256, OS_MF512,
-                 { multi-media sizes: double-precision floating-point }
-                  OS_MD64, OS_MD128, OS_MD256, OS_MD512);
+                 { multi-media sizes, describes only the register size but not how it is split,
+                   this information must be passed separately }
+                  OS_M8,  OS_M16,  OS_M32,  OS_M64,  OS_M128,  OS_M256,  OS_M512);
 
       { Register types }
       TRegisterType = (
@@ -208,7 +210,15 @@ interface
         R_SPECIALREGISTER, { = 5 }
         R_ADDRESSREGISTER, { = 6 }
         { used on llvm, every temp gets its own "base register" }
-        R_TEMPREGISTER     { = 7 }
+        R_TEMPREGISTER,    { = 7 }
+        { used on llvm for tracking metadata (every unique metadata has its own base register) }
+        R_METADATAREGISTER,{ = 8 }
+        { optional MAC16 (16 bit multiply-accumulate) registers on Xtensa }
+        R_MAC16REGISTER    { = 9 }
+
+        { do not add more than 16 elements (ifdef by cpu type if needed)
+          so we can store this in one nibble and pack TRegister
+          if the supreg width should be extended }
       );
 
       { Sub registers }
@@ -230,6 +240,17 @@ interface
         R_SUBMMX,     { = 12; 128 BITS }
         R_SUBMMY,     { = 13; 256 BITS }
         R_SUBMMZ,     { = 14; 512 BITS }
+{$ifdef Z80}
+        { Subregisters for the flags register (Z80) }
+        R_SUBFLAGCARRY,          { = 15; Carry flag }
+        R_SUBFLAGADDSUBTRACT,    { = 16; Add/Subtract flag }
+        R_SUBFLAGPARITYOVERFLOW, { = 17; Parity/Overflow flag }
+        R_SUBFLAGUNUSEDBIT3,     { = 18; Unused flag (bit 3) }
+        R_SUBFLAGHALFCARRY,      { = 19; Half Carry flag }
+        R_SUBFLAGUNUSEDBIT5,     { = 20; Unused flag (bit 5) }
+        R_SUBFLAGZERO,           { = 21; Zero flag }
+        R_SUBFLAGSIGN,           { = 22; Sign flag }
+{$else Z80}
         { Subregisters for the flags register (x86) }
         R_SUBFLAGCARRY,     { = 15; Carry flag }
         R_SUBFLAGPARITY,    { = 16; Parity flag }
@@ -238,7 +259,12 @@ interface
         R_SUBFLAGSIGN,      { = 19; Sign flag }
         R_SUBFLAGOVERFLOW,  { = 20; Overflow flag }
         R_SUBFLAGINTERRUPT, { = 21; Interrupt enable flag }
-        R_SUBFLAGDIRECTION  { = 22; Direction flag }
+        R_SUBFLAGDIRECTION, { = 22; Direction flag }
+{$endif Z80}
+        R_SUBMM8B,          { = 23; for part of v regs on aarch64 }
+        R_SUBMM16B,         { = 24; for part of v regs on aarch64 }
+        { subregisters for the metadata register (llvm) }
+        R_SUBMETASTRING     { = 25 }
       );
       TSubRegisterSet = set of TSubRegister;
 
@@ -295,11 +321,12 @@ interface
         passed to an mm operation is nil, it means that the whole location is moved }
       tmmshuffle = record
         { describes how many shuffles are actually described, if len=0 then
-          moving the scalar with index 0 to the scalar with index 0 is meant }
-        len : byte;
-        { lower nibble of each entry of this array describes index of the source data index while
-          the upper nibble describes the destination index }
-        shuffles : array[1..1] of byte;
+          moving the scalar with index 0 to the scalar with index 0 is meant,
+          if len=-1, then a variable/unknown length is assumed }
+        len : Shortint;
+        { lower byte of each entry of this array describes index of the source data index while
+          the upper byte describes the destination index }
+        shuffles : array[1..1] of word;
       end;
 
       Tsuperregisterarray=array[0..$ffff] of Tsuperregister;
@@ -316,6 +343,7 @@ interface
         procedure clear;
         procedure add(s:tsuperregister);
         function addnodup(s:tsuperregister): boolean;
+        { returns the last element and removes it from the list }
         function get:tsuperregister;
         function readidx(i:word):tsuperregister;
         procedure deleteidx(i:word);
@@ -339,12 +367,7 @@ interface
          { floating point values }
          4,  8, 10,  8, 16,
          { multimedia values }
-         1,  2,  4,  8, 16, 32, 64,
-         1,  2,  4,  8, 16, 32, 64,
-         { single-precision multimedia values }
-         4, 16, 32, 64,
-         { double-precision multimedia values }
-         8, 16, 32, 64);
+         1,  2,  4,  8, 16, 32, 64);
 
        tfloat2tcgsize: array[tfloattype] of tcgsize =
          (OS_F32,OS_F64,OS_F80,OS_F80,OS_C64,OS_C64,OS_F128);
@@ -384,10 +407,7 @@ interface
          OS_8,    OS_16,   OS_32,   OS_64,   OS_128,
 
          OS_F32,  OS_F64,  OS_F80,  OS_C64,  OS_F128,
-         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256, OS_M512,
-         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256, OS_M512,
-         OS_MF32, OS_MF128,OS_MF256,OS_MF512,
-         OS_MD64, OS_MD128,OS_MD256,OS_MD512);
+         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256, OS_M512);
 
 
        tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO,
@@ -395,10 +415,7 @@ interface
          OS_S8,   OS_S16,  OS_S32,  OS_S64,  OS_S128,
 
          OS_F32,  OS_F64,  OS_F80,  OS_C64,  OS_F128,
-         OS_MS8,  OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512,
-         OS_MS8,  OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512,
-         OS_MF32, OS_MF128,OS_MF256,OS_MF512,
-         OS_MD64, OS_MD128,OS_MD256,OS_MD512);
+         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256,OS_M512);
 
 
        tcgloc2str : array[TCGLoc] of string[12] = (
@@ -424,7 +441,13 @@ interface
             );
 
     var
-       mms_movescalar : pmmshuffle;
+       mms_movescalar,
+       mms_variable,
+       mms_2,
+       mms_4,
+       mms_8,
+       mms_16,
+       mms_32 : pmmshuffle;
 
     procedure supregset_reset(var regs:tsuperregisterset;setall:boolean;
                               maxreg:Tsuperregister);{$ifdef USEINLINE}inline;{$endif}
@@ -469,10 +492,13 @@ interface
       the source }
     procedure removeshuffles(var shuffle : tmmshuffle);
 
+    function is_float_cgsize(size: tcgsize): boolean;{$ifdef USEINLINE}inline;{$endif}
+
 implementation
 
     uses
-      verbose;
+      verbose,
+      cutils;
 
 {******************************************************************************
                              tsuperregisterworklist
@@ -565,9 +591,8 @@ implementation
     begin
       if length=0 then
         internalerror(200310142);
-      get:=buf^[0];
-      buf^[0]:=buf^[length-1];
       dec(length);
+      get:=buf^[length];
     end;
 
 
@@ -705,6 +730,8 @@ implementation
             result:=result+'my';
           R_SUBMMZ:
             result:=result+'mz';
+          R_SUBMM8B:
+            result:=result+'m8b';
           else
             internalerror(200308252);
         end;
@@ -750,13 +777,13 @@ implementation
       begin
         case a of
           4:
-            result := OS_MF32;
+            result := OS_M32;
           16:
-            result := OS_MF128;
+            result := OS_M128;
           32:
-            result := OS_MF256;
+            result := OS_M256;
           64:
-            result := OS_MF512;
+            result := OS_M512;
           else
             result := int_cgsize(a);
         end;
@@ -766,13 +793,13 @@ implementation
       begin
         case a of
           8:
-            result := OS_MD64;
+            result := OS_M64;
           16:
-            result := OS_MD128;
+            result := OS_M128;
           32:
-            result := OS_MD256;
+            result := OS_M256;
           64:
-            result := OS_MD512;
+            result := OS_M512;
           else
             result := int_cgsize(a);
         end;
@@ -820,13 +847,13 @@ implementation
         i : longint;
       begin
         realshuffle:=true;
-        if (shuffle=nil) or (shuffle^.len=0) then
+        if (shuffle=nil) or (shuffle^.len<1) then
           realshuffle:=false
         else
           begin
             for i:=1 to shuffle^.len do
               begin
-                if (shuffle^.shuffles[i] and $f)<>((shuffle^.shuffles[i] and $f0) shr 4) then
+                if (shuffle^.shuffles[i] and $ff)<>((shuffle^.shuffles[i] and $ff00) shr 8) then
                   exit;
               end;
             realshuffle:=false;
@@ -851,9 +878,40 @@ implementation
       end;
 
 
+    function is_float_cgsize(size: tcgsize): boolean;{$ifdef USEINLINE}inline;{$endif}
+      begin
+        result:=size in [OS_F32..OS_F128];
+      end;
+
+
+   procedure Initmms(var p : pmmshuffle;len : ShortInt);
+     var
+       i : Integer;
+     begin
+       Getmem(p,sizeof(tmmshuffle)+(max(len,0)-1)*2);
+       p^.len:=len;
+       for i:=1 to len do
+{$push}
+{$R-}
+         p^.shuffles[i]:=i;
+{$pop}
+     end;
+
 initialization
-  new(mms_movescalar);
-  mms_movescalar^.len:=0;
+  Initmms(mms_movescalar,0);
+  Initmms(mms_variable,-1);
+  Initmms(mms_2,2);
+  Initmms(mms_4,4);
+  Initmms(mms_8,8);
+  Initmms(mms_16,16);
+  Initmms(mms_32,32);
 finalization
-  dispose(mms_movescalar);
+  Freemem(mms_movescalar);
+  Freemem(mms_variable);
+  Freemem(mms_2);
+  Freemem(mms_4);
+  Freemem(mms_8);
+  Freemem(mms_16);
+  Freemem(mms_32);
 end.
+

+ 6 - 6
compiler/cgexcept.pas

@@ -154,7 +154,7 @@ unit cgexcept;
       begin
         current_asmdata.getjumplabel(exceptstate.exceptionlabel);
         exceptstate.oldflowcontrol:=flowcontrol;
-        exceptstate.finallycodelabel:=nil;;
+        exceptstate.finallycodelabel:=nil;
 
         paraloc1.init;
         paraloc2.init;
@@ -162,9 +162,9 @@ unit cgexcept;
 
         { fpc_pushexceptaddr(exceptionframetype, setjmp_buffer, exception_address_chain_entry) }
         pd:=search_system_proc('fpc_pushexceptaddr');
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         if pd.is_pushleftright then
           begin
             { type of exceptionframe }
@@ -197,7 +197,7 @@ unit cgexcept;
 
         { fpc_setjmp(result_of_pushexceptaddr_call) }
         pd:=search_system_proc('fpc_setjmp');
-        paramanager.getintparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
 
         hlcg.a_load_reg_cgpara(list,pushexceptres.def,tmpresloc.register,paraloc1);
         paramanager.freecgpara(list,paraloc1);
@@ -302,7 +302,7 @@ unit cgexcept;
         reference_reset_symbol(href2, current_asmdata.RefAsmSymbol(excepttype.vmt_mangledname, AT_DATA, indirect), 0, sizeof(pint), []);
         if otherunit then
           current_module.add_extern_asmsym(excepttype.vmt_mangledname, AB_EXTERNAL, AT_DATA);
-        paramanager.getintparaloc(list, pd, 1, paraloc1);
+        paramanager.getcgtempparaloc(list, pd, 1, paraloc1);
         hlcg.a_loadaddr_ref_cgpara(list, excepttype.vmt_def, href2, paraloc1);
         paramanager.freecgpara(list, paraloc1);
         fpc_catches_res:=hlcg.g_call_system_proc(list, pd, [@paraloc1], nil);

+ 315 - 196
compiler/cgobj.pas

@@ -304,10 +304,6 @@ unit cgobj;
           procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual;
 
           { basic arithmetic operations }
-          { note: for operators which require only one argument (not, neg), use }
-          { the op_reg_reg, op_reg_ref or op_reg_loc methods and keep in mind   }
-          { that in this case the *second* operand is used as both source and   }
-          { destination (JM)                                                    }
           procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); virtual; abstract;
           procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); virtual;
           procedure a_op_const_loc(list : TAsmList; Op: TOpCG; a: tcgint; const loc: tlocation);
@@ -326,6 +322,11 @@ unit cgobj;
           procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation); virtual;
           procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation); virtual;
 
+          { unary operations (not, neg) }
+          procedure a_op_reg(list : TAsmList; Op: TOpCG; size: TCGSize; reg: TRegister); virtual;
+          procedure a_op_ref(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference); virtual;
+          procedure a_op_loc(list : TAsmList; Op: TOpCG; const loc: tlocation);
+
           {  comparison operations }
           procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
             l : tasmlabel); virtual;
@@ -519,11 +520,15 @@ unit cgobj;
         procedure a_op64_const_ref(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const ref : treference);virtual;abstract;
         procedure a_op64_const_loc(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const l: tlocation);virtual;abstract;
         procedure a_op64_reg_loc(list : TAsmList;op:TOpCG;size : tcgsize;reg : tregister64;const l : tlocation);virtual;abstract;
+        procedure a_op64_ref_loc(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference;const l : tlocation);virtual;abstract;
         procedure a_op64_loc_reg(list : TAsmList;op:TOpCG;size : tcgsize;const l : tlocation;reg64 : tregister64);virtual;abstract;
         procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);virtual;
         procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);virtual;
         procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);virtual;
         procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);virtual;
+        procedure a_op64_reg(list : TAsmList;op:TOpCG;size : tcgsize;regdst : tregister64);virtual;
+        procedure a_op64_ref(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference);virtual;
+        procedure a_op64_loc(list : TAsmList;op:TOpCG;size : tcgsize;const l : tlocation);virtual;
 
         procedure a_op64_const_subsetref(list : TAsmList; Op : TOpCG; size : TCGSize; a : int64; const sref: tsubsetreference);
         procedure a_op64_reg_subsetref(list : TAsmList; Op : TOpCG; size : TCGSize; reg: tregister64; const sref: tsubsetreference);
@@ -578,8 +583,8 @@ unit cgobj;
 implementation
 
     uses
-       globals,systems,
-       verbose,paramgr,symsym,
+       globals,systems,fmodule,
+       verbose,paramgr,symsym,symtable,
        tgobj,cutils,procinfo;
 
 {*****************************************************************************
@@ -742,15 +747,15 @@ implementation
 {$if defined(cpu8bitalu) or defined(cpu16bitalu)}
     function tcg.GetNextReg(const r: TRegister): TRegister;
       begin
-{$ifndef AVR}
+{$ifdef AVR}
         { the AVR code generator depends on the fact that it can do GetNextReg also on physical registers }
+        if (getsupreg(r)>=first_int_imreg) and not(has_next_reg[getsupreg(r)]) then
+          internalerror(2017091103);
+{$else AVR}
         if getsupreg(r)<first_int_imreg then
           internalerror(2013051401);
         if not has_next_reg[getsupreg(r)] then
           internalerror(2017091103);
-{$else AVR}
-        if (getsupreg(r)>=first_int_imreg) and not(has_next_reg[getsupreg(r)]) then
-          internalerror(2017091103);
 {$endif AVR}
         if getregtype(r)<>R_INTREGISTER then
           internalerror(2017091101);
@@ -1022,145 +1027,151 @@ implementation
         location: pcgparalocation;
         orgsizeleft,
         sizeleft: tcgint;
+        usesize: tcgsize;
         reghasvalue: boolean;
       begin
         location:=cgpara.location;
         tmpref:=r;
         sizeleft:=cgpara.intsize;
-        while assigned(location) do
-          begin
-            paramanager.allocparaloc(list,location);
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                begin
-                   { Parameter locations are often allocated in multiples of
-                     entire registers. If a parameter only occupies a part of
-                     such a register (e.g. a 16 bit int on a 32 bit
-                     architecture), the size of this parameter can only be
-                     determined by looking at the "size" parameter of this
-                     method -> if the size parameter is <= sizeof(aint), then
-                     we check that there is only one parameter location and
-                     then use this "size" to load the value into the parameter
-                     location }
-                   if (size<>OS_NO) and
-                      (tcgsize2size[size]<=sizeof(aint)) then
-                     begin
-                       cgpara.check_simple_location;
-                       a_load_ref_reg(list,size,location^.size,tmpref,location^.register);
-                       if location^.shiftval<0 then
-                         a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
-                     end
-                   { there's a lot more data left, and the current paraloc's
-                     register is entirely filled with part of that data }
-                   else if (sizeleft>sizeof(aint)) then
-                     begin
-                       a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                     end
-                   { we're at the end of the data, and it can be loaded into
-                     the current location's register with a single regular
-                     load }
-                   else if sizeleft in [1,2,4,8] then
-                     begin
-                       a_load_ref_reg(list,int_cgsize(sizeleft),location^.size,tmpref,location^.register);
-                       if location^.shiftval<0 then
-                         a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
-                     end
-                   { we're at the end of the data, and we need multiple loads
-                     to get it in the register because it's an irregular size }
-                   else
-                     begin
-                       { should be the last part }
-                       if assigned(location^.next) then
-                         internalerror(2010052907);
-                       { load the value piecewise to get it into the register }
-                       orgsizeleft:=sizeleft;
-                       reghasvalue:=false;
+        repeat
+          paramanager.allocparaloc(list,location);
+          case location^.loc of
+            LOC_REGISTER,LOC_CREGISTER:
+              begin
+                 { Parameter locations are often allocated in multiples of
+                   entire registers. If a parameter only occupies a part of
+                   such a register (e.g. a 16 bit int on a 32 bit
+                   architecture), the size of this parameter can only be
+                   determined by looking at the "size" parameter of this
+                   method -> if the size parameter is <= sizeof(aint), then
+                   we check that there is only one parameter location and
+                   then use this "size" to load the value into the parameter
+                   location }
+                 if (size<>OS_NO) and
+                    (tcgsize2size[size]<=sizeof(aint)) then
+                   begin
+                     cgpara.check_simple_location;
+                     a_load_ref_reg(list,size,location^.size,tmpref,location^.register);
+                     if location^.shiftval<0 then
+                       a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
+                   end
+                 { there's a lot more data left, and the current paraloc's
+                   register is entirely filled with part of that data }
+                 else if (sizeleft>sizeof(aint)) then
+                   begin
+                     a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
+                   end
+                 { we're at the end of the data, and it can be loaded into
+                   the current location's register with a single regular
+                   load }
+                 else if sizeleft in [1,2,4,8] then
+                   begin
+                     a_load_ref_reg(list,int_cgsize(sizeleft),location^.size,tmpref,location^.register);
+                     if location^.shiftval<0 then
+                       a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
+                   end
+                 { we're at the end of the data, and we need multiple loads
+                   to get it in the register because it's an irregular size }
+                 else
+                   begin
+                     { should be the last part }
+                     if assigned(location^.next) then
+                       internalerror(2010052907);
+                     { load the value piecewise to get it into the register }
+                     orgsizeleft:=sizeleft;
+                     reghasvalue:=false;
 {$ifdef cpu64bitalu}
-                       if sizeleft>=4 then
-                         begin
-                           a_load_ref_reg(list,OS_32,location^.size,tmpref,location^.register);
-                           dec(sizeleft,4);
-                           if target_info.endian=endian_big then
-                             a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,location^.register);
-                           inc(tmpref.offset,4);
-                           reghasvalue:=true;
-                         end;
+                     if sizeleft>=4 then
+                       begin
+                         a_load_ref_reg(list,OS_32,location^.size,tmpref,location^.register);
+                         dec(sizeleft,4);
+                         if target_info.endian=endian_big then
+                           a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,location^.register);
+                         inc(tmpref.offset,4);
+                         reghasvalue:=true;
+                       end;
 {$endif cpu64bitalu}
-                       if sizeleft>=2 then
-                         begin
-                           tmpreg:=getintregister(list,location^.size);
-                           a_load_ref_reg(list,OS_16,location^.size,tmpref,tmpreg);
-                           dec(sizeleft,2);
-                           if reghasvalue then
-                             begin
-                               if target_info.endian=endian_big then
-                                 a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg)
-                               else
-                                 a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+2))*8,tmpreg);
-                               a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register);
-                             end
-                           else
-                             begin
-                               if target_info.endian=endian_big then
-                                 a_op_const_reg_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg,location^.register)
-                               else
-                                 a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
-                             end;
-                           inc(tmpref.offset,2);
-                           reghasvalue:=true;
-                         end;
-                       if sizeleft=1 then
-                         begin
-                           tmpreg:=getintregister(list,location^.size);
-                           a_load_ref_reg(list,OS_8,location^.size,tmpref,tmpreg);
-                           dec(sizeleft,1);
-                           if reghasvalue then
-                             begin
-                               if target_info.endian=endian_little then
-                                 a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+1))*8,tmpreg);
-                               a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register)
-                             end
-                           else
-                             a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
-                           inc(tmpref.offset);
-                         end;
-                       if location^.shiftval<0 then
-                         a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
-                       { the loop will already adjust the offset and sizeleft }
-                       dec(tmpref.offset,orgsizeleft);
-                       sizeleft:=orgsizeleft;
-                     end;
-                end;
-              LOC_REFERENCE,LOC_CREFERENCE:
-                begin
-                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,newalignment(cgpara.alignment,cgpara.intsize-sizeleft),[]);
-                  a_load_ref_cgparalocref(list,size,sizeleft,tmpref,ref,cgpara,location);
-                end;
-              LOC_MMREGISTER,LOC_CMMREGISTER:
-                begin
-                   case location^.size of
-                     OS_F32,
-                     OS_F64,
-                     OS_F128:
-                       a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,mms_movescalar);
-                     OS_M8..OS_M128,
-                     OS_MS8..OS_MS128:
-                       a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,nil);
-                     else
-                       internalerror(2010053101);
+                     if sizeleft>=2 then
+                       begin
+                         tmpreg:=getintregister(list,location^.size);
+                         a_load_ref_reg(list,OS_16,location^.size,tmpref,tmpreg);
+                         dec(sizeleft,2);
+                         if reghasvalue then
+                           begin
+                             if target_info.endian=endian_big then
+                               a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg)
+                             else
+                               a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+2))*8,tmpreg);
+                             a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register);
+                           end
+                         else
+                           begin
+                             if target_info.endian=endian_big then
+                               a_op_const_reg_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg,location^.register)
+                             else
+                               a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
+                           end;
+                         inc(tmpref.offset,2);
+                         reghasvalue:=true;
+                       end;
+                     if sizeleft=1 then
+                       begin
+                         tmpreg:=getintregister(list,location^.size);
+                         a_load_ref_reg(list,OS_8,location^.size,tmpref,tmpreg);
+                         dec(sizeleft,1);
+                         if reghasvalue then
+                           begin
+                             if target_info.endian=endian_little then
+                               a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+1))*8,tmpreg);
+                             a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register)
+                           end
+                         else
+                           a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
+                         inc(tmpref.offset);
+                       end;
+                     if location^.shiftval<0 then
+                       a_op_const_reg(list,OP_SHL,location^.size,-location^.shiftval,location^.register);
+                     { the loop will already adjust the offset and sizeleft }
+                     dec(tmpref.offset,orgsizeleft);
+                     sizeleft:=orgsizeleft;
                    end;
-                end;
-              LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                begin
-                  a_loadfpu_ref_reg(list,size,location^.size,tmpref,location^.register);
-                end
-              else
-                internalerror(2010053111);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location:=location^.next;
+              end;
+            LOC_REFERENCE,LOC_CREFERENCE:
+              begin
+                reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,newalignment(cgpara.alignment,cgpara.intsize-sizeleft),[]);
+                a_load_ref_cgparalocref(list,size,sizeleft,tmpref,ref,cgpara,location);
+              end;
+            LOC_MMREGISTER,LOC_CMMREGISTER:
+              begin
+                 case location^.size of
+                   OS_F32,
+                   OS_F64,
+                   OS_F128:
+                     a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,mms_movescalar);
+                   OS_M8..OS_M512:
+                     a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,nil);
+                   else
+                     internalerror(2010053101);
+                 end;
+              end;
+            LOC_FPUREGISTER,LOC_CFPUREGISTER:
+              begin
+                { can be not a float size in case of a record passed in fpu registers }
+                { the size comparison is to catch F128 passed in two 64 bit floating point registers }
+                if is_float_cgsize(size) and
+                   (tcgsize2size[location^.size]>=tcgsize2size[size]) then
+                  usesize:=size
+                else
+                  usesize:=location^.size;
+                a_loadfpu_ref_reg(list,usesize,location^.size,tmpref,location^.register);
+              end
+            else
+              internalerror(2010053111);
           end;
+          inc(tmpref.offset,tcgsize2size[location^.size]);
+          dec(sizeleft,tcgsize2size[location^.size]);
+          location:=location^.next;
+        until not assigned(location);
       end;
 
     procedure tcg.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
@@ -1230,8 +1241,15 @@ implementation
                 shifted to the top of the to 4 resp. 8 byte register on the
                 caller side and needs to be stored with those bytes at the
                 start of the reference -> don't shift right }
-              else if (paraloc.shiftval<0) and
-                      ((-paraloc.shiftval) in [8,16,32]) then
+              else if (paraloc.shiftval<0)
+{$ifndef MIPS}
+{$ifdef CPU64BITALU}
+                      and ((-paraloc.shiftval) in [56{for byte},48{for two bytes},32{for four bytes}])
+{$else}
+                      and ((-paraloc.shiftval) in [24{for byte},16{for two bytes}])
+{$endif}
+{$endif}
+                  then
                 begin
                   a_op_const_reg_reg(list,OP_SHR,OS_INT,-paraloc.shiftval,paraloc.register,paraloc.register);
                   { convert to a register of 1/2/4 bytes in size, since the
@@ -1349,8 +1367,7 @@ implementation
                 OS_F64,
                 OS_F128:
                   a_loadmm_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref,mms_movescalar);
-                OS_M8..OS_M128,
-                OS_MS8..OS_MS128:
+                OS_M8..OS_M512:
                   a_loadmm_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref,nil);
                 else
                   internalerror(2010053102);
@@ -1406,8 +1423,7 @@ implementation
                        OS_F64,
                        OS_F128:
                         a_loadmm_reg_reg(list,paraloc.size,regsize,paraloc.register,reg,mms_movescalar);
-                       OS_M8..OS_M128,
-                       OS_MS8..OS_MS128:
+                       OS_M8..OS_M512:
                          a_loadmm_reg_reg(list,paraloc.size,paraloc.size,paraloc.register,reg,nil);
                        else
                          internalerror(2010053102);
@@ -1880,49 +1896,66 @@ implementation
 
     procedure tcg.a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const cgpara : TCGPara);
       var
-         href : treference;
-         hsize: tcgsize;
-         paraloc: PCGParaLocation;
+        srcref,
+        href : treference;
+        srcsize,
+        hsize: tcgsize;
+        paraloc: PCGParaLocation;
+        sizeleft: tcgint;
       begin
-         case cgpara.location^.loc of
-          LOC_FPUREGISTER,LOC_CFPUREGISTER:
-            begin
-              paramanager.alloccgpara(list,cgpara);
-              paraloc:=cgpara.location;
-              href:=ref;
-              while assigned(paraloc) do
-                begin
-                  if not(paraloc^.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
-                    internalerror(2015031501);
-                  a_loadfpu_ref_reg(list,paraloc^.size,paraloc^.size,href,paraloc^.register);
-                  inc(href.offset,tcgsize2size[paraloc^.size]);
-                  paraloc:=paraloc^.next;
-                end;
-            end;
-          LOC_REFERENCE,LOC_CREFERENCE:
-            begin
-              cgpara.check_simple_location;
-              reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,ctempposinvalid,cgpara.alignment,[]);
-              { concatcopy should choose the best way to copy the data }
-              g_concatcopy(list,ref,href,tcgsize2size[size]);
-            end;
-          LOC_REGISTER,LOC_CREGISTER:
-            begin
-              { force integer size }
-              hsize:=int_cgsize(tcgsize2size[size]);
-{$ifndef cpu64bitalu}
-              if (hsize in [OS_S64,OS_64]) then
-                cg64.a_load64_ref_cgpara(list,ref,cgpara)
-              else
-{$endif not cpu64bitalu}
-                begin
-                  cgpara.check_simple_location;
-                  a_load_ref_cgpara(list,hsize,ref,cgpara)
-                end;
-            end
-          else
-            internalerror(200402201);
-        end;
+        sizeleft:=cgpara.intsize;
+        paraloc:=cgpara.location;
+        paramanager.alloccgpara(list,cgpara);
+        srcref:=ref;
+        repeat
+          case paraloc^.loc of
+            LOC_FPUREGISTER,LOC_CFPUREGISTER:
+              begin
+                { destination: can be something different in case of a record passed in fpu registers }
+                if is_float_cgsize(paraloc^.size) then
+                  hsize:=paraloc^.size
+                else
+                  hsize:=int_float_cgsize(tcgsize2size[paraloc^.size]);
+                { source: the size comparison is to catch F128 passed in two 64 bit floating point registers }
+                if is_float_cgsize(size) and
+                   (tcgsize2size[size]<=tcgsize2size[paraloc^.size]) then
+                  srcsize:=size
+                else
+                  srcsize:=hsize;
+                a_loadfpu_ref_reg(list,srcsize,hsize,srcref,paraloc^.register);
+              end;
+            LOC_REFERENCE,LOC_CREFERENCE:
+              begin
+                if assigned(paraloc^.next) then
+                  internalerror(2020050101);
+                reference_reset_base(href,paraloc^.reference.index,paraloc^.reference.offset,ctempposinvalid,newalignment(cgpara.alignment,cgpara.intsize-sizeleft),[]);
+                { concatcopy should choose the best way to copy the data }
+                g_concatcopy(list,srcref,href,sizeleft);
+              end;
+            LOC_REGISTER,LOC_CREGISTER:
+              begin
+                { force integer size }
+                hsize:=int_cgsize(tcgsize2size[paraloc^.size]);
+  {$ifndef cpu64bitalu}
+                if (hsize in [OS_S64,OS_64]) then
+                  begin
+                    { if this is not a simple location, we'll have to add support to cg64 to load parts of a cgpara }
+                    cgpara.check_simple_location;
+                    cg64.a_load64_ref_cgpara(list,srcref,cgpara)
+                  end
+                else
+  {$endif not cpu64bitalu}
+                  begin
+                    a_load_ref_reg(list,hsize,hsize,srcref,paraloc^.register)
+                  end;
+              end
+            else
+              internalerror(200402201);
+          end;
+          inc(srcref.offset,tcgsize2size[paraloc^.size]);
+          dec(sizeleft,tcgsize2size[paraloc^.size]);
+          paraloc:=paraloc^.next;
+        until not assigned(paraloc);
       end;
 
 
@@ -1987,17 +2020,19 @@ implementation
           end
         else
           tmpref:=ref;
-        tmpreg:=getintregister(list,size);
-        a_load_ref_reg(list,size,size,tmpref,tmpreg);
         if op in [OP_NEG,OP_NOT] then
           begin
-            if reg<>NR_NO then
-              internalerror(2017040901);
-            a_op_reg_reg(list,op,size,tmpreg,tmpreg);
+            tmpreg:=getintregister(list,size);
+            a_op_reg_reg(list,op,size,reg,tmpreg);
+            a_load_reg_ref(list,size,size,tmpreg,tmpref);
           end
         else
-          a_op_reg_reg(list,op,size,reg,tmpreg);
-        a_load_reg_ref(list,size,size,tmpreg,tmpref);
+          begin
+            tmpreg:=getintregister(list,size);
+            a_load_ref_reg(list,size,size,tmpref,tmpreg);
+            a_op_reg_reg(list,op,size,reg,tmpreg);
+            a_load_reg_ref(list,size,size,tmpreg,tmpref);
+          end;
       end;
 
 
@@ -2102,6 +2137,8 @@ implementation
                     a_load_const_reg(list,OS_8,0,dst);
                     exit;
                   end;
+                else
+                  ;
               end;
           end;
         OP_SHR:
@@ -2114,6 +2151,8 @@ implementation
                     a_load_const_reg(list,OS_8,0,GetNextReg(dst));
                     exit;
                   end;
+                else
+                  ;
               end;
           end;
 {$endif cpu8bitalu}
@@ -2194,6 +2233,49 @@ implementation
       end;
 
 
+    procedure tcg.a_op_reg(list: TAsmList; Op: TOpCG; size: TCGSize; reg: TRegister);
+      begin
+        if not (Op in [OP_NOT,OP_NEG]) then
+          internalerror(2020050701);
+        a_op_reg_reg(list,op,size,reg,reg);
+      end;
+
+
+    procedure tcg.a_op_ref(list: TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference);
+      var
+        tmpreg: TRegister;
+        tmpref: treference;
+      begin
+        if not (Op in [OP_NOT,OP_NEG]) then
+          internalerror(2020050701);
+        if assigned(ref.symbol) then
+          begin
+            tmpreg:=getaddressregister(list);
+            a_loadaddr_ref_reg(list,ref,tmpreg);
+            reference_reset_base(tmpref,tmpreg,0,ref.temppos,ref.alignment,[]);
+          end
+        else
+          tmpref:=ref;
+        tmpreg:=getintregister(list,size);
+        a_load_ref_reg(list,size,size,tmpref,tmpreg);
+        a_op_reg_reg(list,op,size,tmpreg,tmpreg);
+        a_load_reg_ref(list,size,size,tmpreg,tmpref);
+      end;
+
+
+    procedure tcg.a_op_loc(list: TAsmList; Op: TOpCG; const loc: tlocation);
+      begin
+        case loc.loc of
+          LOC_REGISTER, LOC_CREGISTER:
+            a_op_reg(list,op,loc.size,loc.register);
+          LOC_REFERENCE, LOC_CREFERENCE:
+            a_op_ref(list,op,loc.size,loc.reference);
+          else
+            internalerror(2020050702);
+        end;
+      end;
+
+
     procedure tcg.a_cmp_const_reg_label(list: TAsmList; size: tcgsize;
       cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
       var
@@ -2770,7 +2852,7 @@ implementation
           system_i386_darwin,
           system_i386_iphonesim,
           system_powerpc64_darwin,
-          system_arm_darwin:
+          system_arm_ios:
             begin
               nlsymname:='L'+symname+'$non_lazy_ptr';
               l:=current_asmdata.getasmsymbol(nlsymname);
@@ -2817,6 +2899,8 @@ implementation
     procedure tcg.g_call(list: TAsmList;const s: string);
       begin
         allocallcpuregisters(list);
+        if systemunit<>current_module.globalsymtable then
+          current_module.add_extern_asmsym(s,AB_EXTERNAL,AT_FUNCTION);
         a_call_name(list,s,false);
         deallocallcpuregisters(list);
       end;
@@ -3048,6 +3132,41 @@ implementation
       end;
 
 
+    procedure tcg64.a_op64_reg(list: TAsmList; op: TOpCG; size: tcgsize; regdst: tregister64);
+      begin
+        if not (op in [OP_NOT,OP_NEG]) then
+          internalerror(2020050706);
+        a_op64_reg_reg(list,op,size,regdst,regdst);
+      end;
+
+
+    procedure tcg64.a_op64_ref(list: TAsmList; op: TOpCG; size: tcgsize; const ref: treference);
+      var
+        tempreg: tregister64;
+      begin
+        if not (op in [OP_NOT,OP_NEG]) then
+          internalerror(2020050706);
+        tempreg.reghi:=cg.getintregister(list,OS_32);
+        tempreg.reglo:=cg.getintregister(list,OS_32);
+        a_load64_ref_reg(list,ref,tempreg);
+        a_op64_reg_reg(list,op,size,tempreg,tempreg);
+        a_load64_reg_ref(list,tempreg,ref);
+      end;
+
+
+    procedure tcg64.a_op64_loc(list: TAsmList; op: TOpCG; size: tcgsize; const l: tlocation);
+      begin
+        case l.loc of
+          LOC_REFERENCE, LOC_CREFERENCE:
+            a_op64_ref(list,op,size,l.reference);
+          LOC_REGISTER,LOC_CREGISTER:
+            a_op64_reg(list,op,size,l.register64);
+          else
+            internalerror(2020050707);
+        end;
+      end;
+
+
     procedure tcg64.a_load64_loc_subsetref(list : TAsmList;const l: tlocation; const sref : tsubsetreference);
       begin
         case l.loc of

+ 1 - 2
compiler/comphook.pas

@@ -305,8 +305,7 @@ begin
      if status.currentcolumn>0 then
       begin
         if status.use_gccoutput then
-          hs:=gccfilename(status.currentsource)+':'+tostr(status.currentline)+': '+hs+' '+
-              tostr(status.currentcolumn)+': '+s
+          hs:=gccfilename(status.currentsource)+':'+tostr(status.currentline)+':'+tostr(status.currentcolumn)+': '+hs+' '+s
         else
           begin
             hs:=status.currentsource+'('+tostr(status.currentline)+

+ 13 - 3
compiler/compiler.pas

@@ -81,21 +81,31 @@ uses
 {$ifdef beos}
   ,i_beos
 {$endif beos}
-{$ifdef fbsd}
-  ,i_fbsd
-{$endif fbsd}
+{$ifdef bsd}
+{$ifdef darwin}
+  ,i_darwin
+{$else darwin}
+  ,i_bsd
+{$endif darwin}
+{$endif bsd}
 {$ifdef gba}
   ,i_gba
 {$endif gba}
 {$ifdef go32v2}
   ,i_go32v2
 {$endif go32v2}
+{$ifdef haiku}
+  ,i_haiku
+{$endif haiku}
 {$ifdef linux}
   ,i_linux
 {$endif linux}
 {$ifdef macos}
   ,i_macos
 {$endif macos}
+{$ifdef morphos}
+  ,i_morph
+{$endif morphos}
 {$ifdef nds}
   ,i_nds
 {$endif nds}

+ 12 - 2
compiler/compinnr.pas

@@ -21,7 +21,9 @@ unit compinnr;
 interface
 
 const
-  fpc_in_cpu_first   = 10000;
+  { this file needs to be kept in sync with rtl/inc/innr.in }
+  in_cpu_first   = 10000;
+  in_x86_mm_first    = 11000;
 
 type
    tinlinenumber=(
@@ -118,6 +120,7 @@ type
      in_gettypekind_x     = 96,
      in_faraddr_x         = 97,
      in_volatile_x        = 98,
+     in_ismanagedtype_x   = 99,
 
 { Internal constant functions }
      in_const_sqr        = 100,
@@ -160,12 +163,15 @@ type
      in_mmx_pcmpeqd      = 202,
      in_mmx_pcmpgtb      = 203,
      in_mmx_pcmpgtw      = 204,
-     in_mmx_pcmpgtd      = 205
+     in_mmx_pcmpgtd      = 205,
 
      { 3DNow }
 
      { SSE }
 
+{ More internal functions }
+     in_isconstvalue_x    = 1000
+
 {$if defined(X86)}
      ,
      {$i x86/cx86innr.inc}
@@ -174,6 +180,10 @@ type
      ,
      {$i ccpuinnr.inc}
 {$endif }
+{$if defined(Z80)}
+     ,
+     {$i ccpuinnr.inc}
+{$endif}
    );
 
 implementation

Деякі файли не було показано, через те що забагато файлів було змінено